diff --git a/docs/automation/hooks.md b/docs/automation/hooks.md index 2129e98bef7..75cfc2d81b2 100644 --- a/docs/automation/hooks.md +++ b/docs/automation/hooks.md @@ -139,7 +139,7 @@ Hooks are discovered from these directories, in order of increasing override pre Workspace hooks can add new hook names but cannot override bundled, managed, or plugin-provided hooks with the same name. -The Gateway skips internal hook discovery on startup until internal hooks are configured. Enable a bundled or managed hook with `openclaw hooks enable `, install a hook pack, or set `hooks.internal.enabled=true` to opt in. +The Gateway skips internal hook discovery on startup until internal hooks are configured. Enable a bundled or managed hook with `openclaw hooks enable `, install a hook pack, or set `hooks.internal.enabled=true` to opt in. When you enable one named hook, the Gateway loads only that hook's handler; `hooks.internal.enabled=true`, extra hook directories, and legacy handlers opt into broad discovery. ### Hook packs diff --git a/docs/cli/gateway.md b/docs/cli/gateway.md index 6f8dad312f5..b838421dcf4 100644 --- a/docs/cli/gateway.md +++ b/docs/cli/gateway.md @@ -63,6 +63,11 @@ Notes: - `--raw-stream`: log raw model stream events to jsonl. - `--raw-stream-path `: raw stream jsonl path. +Startup profiling: + +- Set `OPENCLAW_GATEWAY_STARTUP_TRACE=1` to log phase timings during Gateway startup. +- Run `pnpm test:startup:gateway -- --runs 5 --warmup 1` to benchmark Gateway startup. The benchmark records first process output, `/healthz`, `/readyz`, and startup trace timings. + ## Query a running Gateway All query commands use WebSocket RPC. @@ -90,6 +95,8 @@ Pass `--token` or `--password` explicitly. Missing explicit credentials is an er openclaw gateway health --url ws://127.0.0.1:18789 ``` +The HTTP `/healthz` endpoint is a liveness probe: it returns once the server can answer HTTP. The HTTP `/readyz` endpoint is stricter and stays red while startup sidecars, channels, or configured hooks are still settling. + ### `gateway usage-cost` Fetch usage-cost summaries from session logs. diff --git a/package.json b/package.json index e45f33a090f..6a7ddba188d 100644 --- a/package.json +++ b/package.json @@ -1483,6 +1483,7 @@ "test:startup:bench:save": "node --import tsx scripts/bench-cli-startup.ts --preset all --runs 5 --warmup 1 --output .artifacts/cli-startup-bench-all.json", "test:startup:bench:smoke": "node --import tsx scripts/bench-cli-startup.ts --preset real --case gatewayStatusJson --runs 1 --warmup 0 --output .artifacts/cli-startup-bench-smoke.json", "test:startup:bench:update": "node scripts/test-update-cli-startup-bench.mjs", + "test:startup:gateway": "node --import tsx scripts/bench-gateway-startup.ts", "test:startup:memory": "node scripts/check-cli-startup-memory.mjs", "test:ui": "pnpm ui:i18n:check && pnpm lint:ui:no-raw-window-open && pnpm --dir ui test", "test:unit": "pnpm test:unit:fast && node scripts/run-vitest.mjs run --config test/vitest/vitest.unit.config.ts", diff --git a/scripts/bench-gateway-startup.ts b/scripts/bench-gateway-startup.ts new file mode 100644 index 00000000000..1019988d8df --- /dev/null +++ b/scripts/bench-gateway-startup.ts @@ -0,0 +1,627 @@ +import { spawn, type ChildProcessWithoutNullStreams } from "node:child_process"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { request } from "node:http"; +import { createServer } from "node:net"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import { performance } from "node:perf_hooks"; + +type GatewayBenchCase = { + config: Record; + env?: Record; + id: string; + name: string; + pluginCount?: number; +}; + +type ProbeResult = { + ms: number | null; + status: number | null; +}; + +type GatewaySample = { + exitCode: number | null; + firstOutputMs: number | null; + healthz: ProbeResult; + outputTail: string; + readyLogMs: number | null; + readyz: ProbeResult; + signal: string | null; + startupTrace: Record; +}; + +type SummaryStats = { + avg: number; + max: number; + min: number; + p50: number; + p95: number; +}; + +type CaseResult = { + id: string; + name: string; + samples: GatewaySample[]; + summary: { + firstOutputMs: SummaryStats | null; + healthzMs: SummaryStats | null; + readyLogMs: SummaryStats | null; + readyzMs: SummaryStats | null; + startupTrace: Record; + }; +}; + +type CliOptions = { + cases: GatewayBenchCase[]; + entry: string; + json: boolean; + output?: string; + runs: number; + timeoutMs: number; + warmup: number; +}; + +const DEFAULT_RUNS = 5; +const DEFAULT_WARMUP = 1; +const DEFAULT_TIMEOUT_MS = 30_000; +const DEFAULT_ENTRY = "dist/entry.js"; + +const BASE_CONFIG = { + browser: { enabled: false }, + gateway: { + mode: "local", + bind: "loopback", + auth: { mode: "none" }, + controlUi: { enabled: false }, + tailscale: { mode: "off" }, + }, + plugins: { + enabled: true, + entries: { + browser: { enabled: false }, + }, + }, +} satisfies Record; + +const GATEWAY_CASES: readonly GatewayBenchCase[] = [ + { + id: "default", + name: "gateway default", + config: BASE_CONFIG, + }, + { + id: "skipChannels", + name: "gateway, skip channels", + env: { OPENCLAW_SKIP_CHANNELS: "1" }, + config: BASE_CONFIG, + }, + { + id: "oneInternalHook", + name: "gateway, one configured internal hook", + env: { OPENCLAW_SKIP_CHANNELS: "1" }, + config: { + ...BASE_CONFIG, + hooks: { + internal: { + entries: { + "session-memory": { enabled: true }, + }, + }, + }, + }, + }, + { + id: "allInternalHooks", + name: "gateway, all internal hooks", + env: { OPENCLAW_SKIP_CHANNELS: "1" }, + config: { + ...BASE_CONFIG, + hooks: { + internal: { + enabled: true, + }, + }, + }, + }, + { + id: "fiftyPlugins", + name: "gateway, 50 manifest plugins", + env: { OPENCLAW_SKIP_CHANNELS: "1" }, + pluginCount: 50, + config: BASE_CONFIG, + }, +] as const; + +function parseFlagValue(flag: string): string | undefined { + const index = process.argv.indexOf(flag); + if (index === -1) { + return undefined; + } + return process.argv[index + 1]; +} + +function hasFlag(flag: string): boolean { + return process.argv.includes(flag); +} + +function parseRepeatableFlag(flag: string): string[] { + const values: string[] = []; + for (let index = 0; index < process.argv.length; index += 1) { + if (process.argv[index] === flag && process.argv[index + 1]) { + values.push(process.argv[index + 1]); + } + } + return values; +} + +function parsePositiveInt(raw: string | undefined, fallback: number): number { + if (!raw) { + return fallback; + } + const parsed = Number.parseInt(raw, 10); + if (!Number.isFinite(parsed) || parsed < 0) { + return fallback; + } + return parsed; +} + +function resolveCases(caseIds: string[]): GatewayBenchCase[] { + if (caseIds.length === 0) { + return [...GATEWAY_CASES]; + } + const byId = new Map(GATEWAY_CASES.map((benchCase) => [benchCase.id, benchCase])); + return caseIds.map((id) => { + const benchCase = byId.get(id); + if (!benchCase) { + throw new Error(`Unknown --case "${id}"`); + } + return benchCase; + }); +} + +function parseOptions(): CliOptions { + return { + cases: resolveCases(parseRepeatableFlag("--case")), + entry: parseFlagValue("--entry") ?? DEFAULT_ENTRY, + json: hasFlag("--json"), + output: parseFlagValue("--output"), + runs: parsePositiveInt(parseFlagValue("--runs"), DEFAULT_RUNS), + timeoutMs: parsePositiveInt(parseFlagValue("--timeout-ms"), DEFAULT_TIMEOUT_MS), + warmup: parsePositiveInt(parseFlagValue("--warmup"), DEFAULT_WARMUP), + }; +} + +function median(values: number[]): number { + const sorted = [...values].toSorted((a, b) => a - b); + const middle = Math.floor(sorted.length / 2); + if (sorted.length % 2 === 0) { + return (sorted[middle - 1] + sorted[middle]) / 2; + } + return sorted[middle] ?? 0; +} + +function percentile(values: number[], p: number): number { + const sorted = [...values].toSorted((a, b) => a - b); + const index = Math.min(sorted.length - 1, Math.floor((p / 100) * sorted.length)); + return sorted[index] ?? 0; +} + +function summarizeNumbers(values: number[]): SummaryStats | null { + if (values.length === 0) { + return null; + } + const total = values.reduce((sum, value) => sum + value, 0); + return { + avg: total / values.length, + max: Math.max(...values), + min: Math.min(...values), + p50: median(values), + p95: percentile(values, 95), + }; +} + +function summarizeCase(benchCase: GatewayBenchCase, samples: GatewaySample[]): CaseResult { + const startupTraceKeys = new Set(); + for (const sample of samples) { + for (const key of Object.keys(sample.startupTrace)) { + startupTraceKeys.add(key); + } + } + const startupTrace: Record = {}; + for (const key of [...startupTraceKeys].toSorted()) { + const stats = summarizeNumbers( + samples + .map((sample) => sample.startupTrace[key]) + .filter((value): value is number => typeof value === "number"), + ); + if (stats) { + startupTrace[key] = stats; + } + } + return { + id: benchCase.id, + name: benchCase.name, + samples, + summary: { + firstOutputMs: summarizeNumbers( + samples + .map((sample) => sample.firstOutputMs) + .filter((value): value is number => typeof value === "number"), + ), + healthzMs: summarizeNumbers( + samples + .map((sample) => sample.healthz.ms) + .filter((value): value is number => typeof value === "number"), + ), + readyLogMs: summarizeNumbers( + samples + .map((sample) => sample.readyLogMs) + .filter((value): value is number => typeof value === "number"), + ), + readyzMs: summarizeNumbers( + samples + .map((sample) => sample.readyz.ms) + .filter((value): value is number => typeof value === "number"), + ), + startupTrace, + }, + }; +} + +function formatMs(value: number | null): string { + if (value == null) { + return "n/a"; + } + return `${value.toFixed(1)}ms`; +} + +function formatStats(stats: SummaryStats | null): string { + if (!stats) { + return "n/a"; + } + return `p50=${formatMs(stats.p50)} avg=${formatMs(stats.avg)} min=${formatMs(stats.min)} max=${formatMs(stats.max)}`; +} + +async function getFreePort(): Promise { + return new Promise((resolve, reject) => { + const server = createServer(); + server.on("error", reject); + server.listen(0, "127.0.0.1", () => { + const address = server.address(); + if (!address || typeof address === "string") { + server.close(() => reject(new Error("failed to allocate port"))); + return; + } + const { port } = address; + server.close(() => resolve(port)); + }); + }); +} + +async function waitForProbe(params: { + deadlineAt: number; + isDone?: () => boolean; + path: string; + port: number; + startAt: number; +}): Promise { + let lastStatus: number | null = null; + while (performance.now() < params.deadlineAt) { + if (params.isDone?.()) { + break; + } + const status = await requestStatus(params.port, params.path).catch(() => null); + lastStatus = status; + if (status === 200) { + return { ms: performance.now() - params.startAt, status }; + } + await delay(25); + } + return { ms: null, status: lastStatus }; +} + +function requestStatus(port: number, pathname: string): Promise { + return new Promise((resolve, reject) => { + const req = request( + { host: "127.0.0.1", method: "GET", path: pathname, port, timeout: 1000 }, + (res) => { + res.resume(); + res.on("end", () => resolve(res.statusCode ?? 0)); + }, + ); + req.on("error", reject); + req.on("timeout", () => { + req.destroy(new Error("probe timeout")); + }); + req.end(); + }); +} + +function delay(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function writePluginFixtures(root: string, count: number): string[] { + const files: string[] = []; + const pluginsDir = path.join(root, "plugins"); + mkdirSync(pluginsDir, { recursive: true }); + for (let index = 0; index < count; index += 1) { + const id = `bench-plugin-${String(index + 1).padStart(2, "0")}`; + const pluginDir = path.join(pluginsDir, id); + mkdirSync(pluginDir, { recursive: true }); + const entry = path.join(pluginDir, "index.cjs"); + writeFileSync(entry, `module.exports = { id: ${JSON.stringify(id)}, register() {} };\n`); + writeFileSync( + path.join(pluginDir, "openclaw.plugin.json"), + `${JSON.stringify({ id, configSchema: { type: "object", additionalProperties: false } }, null, 2)}\n`, + ); + files.push(entry); + } + return files; +} + +function writeConfig(root: string, benchCase: GatewayBenchCase): string { + const pluginPaths = benchCase.pluginCount ? writePluginFixtures(root, benchCase.pluginCount) : []; + const config = { + ...benchCase.config, + plugins: { + ...(benchCase.config.plugins as Record | undefined), + ...(pluginPaths.length > 0 + ? { + load: { paths: pluginPaths }, + allow: pluginPaths.map((file) => path.basename(path.dirname(file))), + } + : {}), + }, + }; + const configPath = path.join(root, "openclaw.json"); + writeFileSync(configPath, `${JSON.stringify(config, null, 2)}\n`); + return configPath; +} + +function sanitizedEnv( + root: string, + configPath: string, + benchCase: GatewayBenchCase, +): NodeJS.ProcessEnv { + const env: NodeJS.ProcessEnv = { + CI: process.env.CI ?? "1", + HOME: root, + LANG: process.env.LANG ?? "en_US.UTF-8", + LOGNAME: process.env.LOGNAME ?? "openclaw-bench", + NO_COLOR: "1", + PATH: process.env.PATH, + SHELL: process.env.SHELL, + TMPDIR: process.env.TMPDIR, + USER: process.env.USER ?? "openclaw-bench", + npm_config_update_notifier: "false", + OPENCLAW_CONFIG: configPath, + OPENCLAW_CONFIG_PATH: configPath, + OPENCLAW_GATEWAY_STARTUP_TRACE: "1", + OPENCLAW_HOME: root, + OPENCLAW_LOCAL_CHECK: "0", + OPENCLAW_STATE_DIR: path.join(root, "state"), + OPENCLAW_TEST_DISABLE_UPDATE_CHECK: "1", + ...benchCase.env, + }; + return env; +} + +async function stopChild(child: ChildProcessWithoutNullStreams): Promise<{ + exitCode: number | null; + signal: string | null; +}> { + if (child.exitCode != null || child.signalCode != null) { + return { exitCode: child.exitCode, signal: child.signalCode }; + } + const exited = new Promise<{ exitCode: number | null; signal: string | null }>((resolve) => { + child.once("exit", (exitCode, signal) => resolve({ exitCode, signal })); + }); + killProcessTree(child, "SIGTERM"); + const timeout = delay(2000).then(() => { + if (child.exitCode == null && child.signalCode == null) { + killProcessTree(child, "SIGKILL"); + } + return exited; + }); + return Promise.race([exited, timeout]); +} + +function killProcessTree(child: ChildProcessWithoutNullStreams, signal: NodeJS.Signals): void { + if (process.platform !== "win32" && child.pid !== undefined) { + try { + process.kill(-child.pid, signal); + return; + } catch { + // Fall back to the direct child below. + } + } + child.kill(signal); +} + +function collectStartupTrace(line: string, startupTrace: Record): void { + const match = /startup trace: ([^ ]+) ([0-9.]+)ms total=([0-9.]+)ms/u.exec(line); + if (!match) { + return; + } + startupTrace[match[1]] = Number(match[2]); + startupTrace[`${match[1]}.total`] = Number(match[3]); +} + +async function runGatewaySample(options: { + benchCase: GatewayBenchCase; + entry: string; + timeoutMs: number; +}): Promise { + const root = mkdtempSync(path.join(tmpdir(), "openclaw-gateway-bench-")); + const port = await getFreePort(); + const configPath = writeConfig(root, options.benchCase); + const env = sanitizedEnv(root, configPath, options.benchCase); + const startAt = performance.now(); + const deadlineAt = startAt + options.timeoutMs; + const startupTrace: Record = {}; + const output: string[] = []; + let firstOutputMs: number | null = null; + let readyLogMs: number | null = null; + let childExited = false; + + const child = spawn( + process.execPath, + [ + options.entry, + "gateway", + "run", + "--port", + String(port), + "--bind", + "loopback", + "--auth", + "none", + "--tailscale", + "off", + "--allow-unconfigured", + ], + { cwd: process.cwd(), detached: process.platform !== "win32", env }, + ); + const childExitPromise = new Promise<{ exitCode: number | null; signal: string | null }>( + (resolve) => { + child.once("exit", (exitCode, signal) => { + childExited = true; + resolve({ exitCode, signal }); + }); + }, + ); + + const onChunk = (chunk: Buffer) => { + if (firstOutputMs == null) { + firstOutputMs = performance.now() - startAt; + } + const text = chunk.toString("utf8"); + output.push(text); + if (output.length > 20) { + output.splice(0, output.length - 20); + } + for (const line of text.split(/\r?\n/u)) { + if (line.includes("ready (") && readyLogMs == null) { + readyLogMs = performance.now() - startAt; + } + collectStartupTrace(line, startupTrace); + } + }; + child.stdout.on("data", onChunk); + child.stderr.on("data", onChunk); + + const healthz = await waitForProbe({ + deadlineAt, + isDone: () => childExited, + path: "/healthz", + port, + startAt, + }); + const readyz = await waitForProbe({ + deadlineAt, + isDone: () => childExited, + path: "/readyz", + port, + startAt, + }); + const exit = await stopChild(child); + await childExitPromise.catch(() => null); + rmSync(root, { force: true, maxRetries: 3, recursive: true, retryDelay: 100 }); + + return { + exitCode: exit.exitCode, + firstOutputMs, + healthz, + outputTail: output.join("").split(/\r?\n/u).slice(-20).join("\n"), + readyLogMs, + readyz, + signal: exit.signal, + startupTrace, + }; +} + +async function runCase(options: { + benchCase: GatewayBenchCase; + entry: string; + runs: number; + timeoutMs: number; + warmup: number; +}): Promise { + const samples: GatewaySample[] = []; + const total = options.runs + options.warmup; + for (let index = 0; index < total; index += 1) { + const sample = await runGatewaySample({ + benchCase: options.benchCase, + entry: options.entry, + timeoutMs: options.timeoutMs, + }); + if (index >= options.warmup) { + samples.push(sample); + console.log( + `[gateway-startup-bench] ${options.benchCase.id} run ${samples.length}/${options.runs}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} readyLog=${formatMs(sample.readyLogMs)}`, + ); + } else { + console.log( + `[gateway-startup-bench] ${options.benchCase.id} warmup ${index + 1}/${options.warmup}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)}`, + ); + } + } + return summarizeCase(options.benchCase, samples); +} + +function printResult(result: CaseResult): void { + console.log(`\n${result.name} (${result.id})`); + console.log(` first output: ${formatStats(result.summary.firstOutputMs)}`); + console.log(` /healthz: ${formatStats(result.summary.healthzMs)}`); + console.log(` ready log: ${formatStats(result.summary.readyLogMs)}`); + console.log(` /readyz: ${formatStats(result.summary.readyzMs)}`); + const trace = Object.entries(result.summary.startupTrace) + .filter(([name]) => !name.endsWith(".total")) + .toSorted((a, b) => (b[1].avg ?? 0) - (a[1].avg ?? 0)) + .slice(0, 8); + if (trace.length > 0) { + console.log(" trace top:"); + for (const [name, stats] of trace) { + console.log(` ${name}: ${formatStats(stats)}`); + } + } +} + +async function main() { + const options = parseOptions(); + const results: CaseResult[] = []; + for (const benchCase of options.cases) { + results.push( + await runCase({ + benchCase, + entry: options.entry, + runs: options.runs, + timeoutMs: options.timeoutMs, + warmup: options.warmup, + }), + ); + } + + const payload = { + entry: options.entry, + generatedAt: new Date().toISOString(), + results, + }; + if (options.output) { + mkdirSync(path.dirname(options.output), { recursive: true }); + writeFileSync(options.output, `${JSON.stringify(payload, null, 2)}\n`); + } + if (options.json) { + console.log(JSON.stringify(payload, null, 2)); + return; + } + for (const result of results) { + printResult(result); + } +} + +main().catch((err) => { + console.error(err instanceof Error ? err.stack : String(err)); + process.exitCode = 1; +}); diff --git a/src/gateway/server-startup-config.secrets.test.ts b/src/gateway/server-startup-config.secrets.test.ts index f2631263049..d11475be4cd 100644 --- a/src/gateway/server-startup-config.secrets.test.ts +++ b/src/gateway/server-startup-config.secrets.test.ts @@ -298,6 +298,38 @@ describe("gateway startup config secret preflight", () => { expect(activateRuntimeSecretsSnapshot).toHaveBeenCalledTimes(1); }); + it("skips inactive gateway auth secret preflight when auth has plain strings", async () => { + const prepareRuntimeSecretsSnapshot = vi.fn(async ({ config }) => preparedSnapshot(config)); + const result = await prepareGatewayStartupConfig({ + configSnapshot: buildSnapshot(gatewayTokenConfig({})), + activateRuntimeSecrets: createRuntimeSecretsActivator({ + logSecrets: { + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }, + emitStateEvent: vi.fn(), + prepareRuntimeSecretsSnapshot, + activateRuntimeSecretsSnapshot: vi.fn(), + }), + }); + + expect(result.auth).toMatchObject({ + mode: "token", + token: "startup-test-token", + }); + expect(prepareRuntimeSecretsSnapshot).toHaveBeenCalledTimes(1); + expect(prepareRuntimeSecretsSnapshot).toHaveBeenCalledWith({ + config: expect.objectContaining({ + gateway: expect.objectContaining({ + auth: expect.objectContaining({ + token: "startup-test-token", + }), + }), + }), + }); + }); + it("uses gateway auth strings resolved during startup preflight for bootstrap auth", async () => { const prepareRuntimeSecretsSnapshot = vi.fn(async ({ config }) => preparedSnapshot({ diff --git a/src/gateway/server-startup-config.ts b/src/gateway/server-startup-config.ts index 516b1f2d34c..adb35b3052b 100644 --- a/src/gateway/server-startup-config.ts +++ b/src/gateway/server-startup-config.ts @@ -209,12 +209,15 @@ export async function prepareGatewayStartupConfig(params: { auth: params.authOverride, tailscale: params.tailscaleOverride, }); - const preflightConfig = ( - await params.activateRuntimeSecrets(startupPreflightConfig, { - reason: "startup", - activate: false, - }) - ).config; + const needsAuthSecretPreflight = hasActiveGatewayAuthSecretRef(startupPreflightConfig); + const preflightConfig = needsAuthSecretPreflight + ? ( + await params.activateRuntimeSecrets(startupPreflightConfig, { + reason: "startup", + activate: false, + }) + ).config + : startupPreflightConfig; const preflightAuthOverride = typeof preflightConfig.gateway?.auth?.token === "string" || typeof preflightConfig.gateway?.auth?.password === "string" @@ -253,6 +256,18 @@ export async function prepareGatewayStartupConfig(params: { }; } +function hasActiveGatewayAuthSecretRef(config: OpenClawConfig): boolean { + const states = evaluateGatewayAuthSurfaceStates({ + config, + defaults: config.secrets?.defaults, + env: process.env, + }); + return GATEWAY_AUTH_SURFACE_PATHS.some((path) => { + const state = states[path]; + return state.hasSecretRef && state.active; + }); +} + function pruneSkippedStartupSecretSurfaces(config: OpenClawConfig): OpenClawConfig { const skipChannels = isTruthyEnvValue(process.env.OPENCLAW_SKIP_CHANNELS) || diff --git a/src/gateway/server-startup-plugins.ts b/src/gateway/server-startup-plugins.ts index 91c1a7ef8d0..8fc07df1928 100644 --- a/src/gateway/server-startup-plugins.ts +++ b/src/gateway/server-startup-plugins.ts @@ -36,16 +36,18 @@ export async function prepareGatewayPluginBootstrap(params: { : params.cfgAtStart; if (!params.minimalTestGateway) { - await runChannelPluginStartupMaintenance({ - cfg: startupMaintenanceConfig, - env: process.env, - log: params.log, - }); - await runStartupSessionMigration({ - cfg: params.cfgAtStart, - env: process.env, - log: params.log, - }); + await Promise.all([ + runChannelPluginStartupMaintenance({ + cfg: startupMaintenanceConfig, + env: process.env, + log: params.log, + }), + runStartupSessionMigration({ + cfg: params.cfgAtStart, + env: process.env, + log: params.log, + }), + ]); } initSubagentRegistry(); diff --git a/src/gateway/server-startup-post-attach.test.ts b/src/gateway/server-startup-post-attach.test.ts index b7729ad9af3..eb5f18f5128 100644 --- a/src/gateway/server-startup-post-attach.test.ts +++ b/src/gateway/server-startup-post-attach.test.ts @@ -130,12 +130,17 @@ describe("startGatewayPostAttachRuntime", () => { it("re-enables startup-gated methods after post-attach sidecars start", async () => { const unavailableGatewayMethods = new Set(["chat.history", "models.list"]); + const onSidecarsReady = vi.fn(); await startGatewayPostAttachRuntime({ ...createPostAttachParams(), unavailableGatewayMethods, + onSidecarsReady, }); + await vi.waitFor(() => { + expect(onSidecarsReady).toHaveBeenCalledTimes(1); + }); expect([...unavailableGatewayMethods]).toEqual([]); expect(hoisted.startPluginServices).toHaveBeenCalledTimes(1); expect(hoisted.loadInternalHooks).not.toHaveBeenCalled(); @@ -155,7 +160,7 @@ describe("startGatewayPostAttachRuntime", () => { }); const unavailableGatewayMethods = new Set(STARTUP_UNAVAILABLE_GATEWAY_METHODS); - const startup = startGatewayPostAttachRuntime( + await startGatewayPostAttachRuntime( { ...createPostAttachParams(), unavailableGatewayMethods, @@ -174,8 +179,9 @@ describe("startGatewayPostAttachRuntime", () => { expect(hoisted.startPluginServices).not.toHaveBeenCalled(); resumeSidecars(); - await startup; - + await vi.waitFor(() => { + expect([...unavailableGatewayMethods]).toEqual([]); + }); expect([...unavailableGatewayMethods]).toEqual([]); expect(startGatewaySidecars).toHaveBeenCalledTimes(1); }); diff --git a/src/gateway/server-startup-post-attach.ts b/src/gateway/server-startup-post-attach.ts index 0020531ce5b..4800517eaf1 100644 --- a/src/gateway/server-startup-post-attach.ts +++ b/src/gateway/server-startup-post-attach.ts @@ -11,7 +11,7 @@ import { GATEWAY_EVENT_UPDATE_AVAILABLE, type GatewayUpdateAvailableEventPayload, } from "./events.js"; -import { logGatewayStartup } from "./server-startup-log.js"; +import type { logGatewayStartup } from "./server-startup-log.js"; import { STARTUP_UNAVAILABLE_GATEWAY_METHODS } from "./server-startup-unavailable-methods.js"; import type { startGatewayTailscaleExposure } from "./server-tailscale.js"; @@ -269,7 +269,7 @@ type Awaitable = T | Promise; type GatewayPostAttachRuntimeDeps = { getGlobalHookRunner: () => Awaitable>; - logGatewayStartup: typeof logGatewayStartup; + logGatewayStartup: (params: Parameters[0]) => Awaitable; scheduleGatewayUpdateCheck: ( ...args: Parameters ) => Awaitable>; @@ -282,7 +282,8 @@ type GatewayPostAttachRuntimeDeps = { const defaultGatewayPostAttachRuntimeDeps: GatewayPostAttachRuntimeDeps = { getGlobalHookRunner: async () => (await import("../plugins/hook-runner-global.js")).getGlobalHookRunner(), - logGatewayStartup, + logGatewayStartup: async (params) => + (await import("./server-startup-log.js")).logGatewayStartup(params), scheduleGatewayUpdateCheck: async (...args) => (await import("../infra/update-startup.js")).scheduleGatewayUpdateCheck(...args), startGatewaySidecars, @@ -326,10 +327,12 @@ export async function startGatewayPostAttachRuntime( }; logChannels: { info: (msg: string) => void; error: (msg: string) => void }; unavailableGatewayMethods: Set; + onPluginServices?: (pluginServices: PluginServicesHandle | null) => void; + onSidecarsReady?: () => void; }, runtimeDeps: GatewayPostAttachRuntimeDeps = defaultGatewayPostAttachRuntimeDeps, ) { - runtimeDeps.logGatewayStartup({ + await runtimeDeps.logGatewayStartup({ cfg: params.cfgAtStart, bindHost: params.bindHost, bindHosts: params.bindHosts, @@ -359,19 +362,21 @@ export async function startGatewayPostAttachRuntime( const tailscaleCleanupPromise = params.minimalTestGateway ? Promise.resolve(null) - : Promise.resolve( - runtimeDeps.startGatewayTailscaleExposure({ - tailscaleMode: params.tailscaleMode, - resetOnExit: params.resetOnExit, - port: params.port, - controlUiBasePath: params.controlUiBasePath, - logTailscale: params.logTailscale, - }), - ); + : params.tailscaleMode === "off" && !params.resetOnExit + ? Promise.resolve(null) + : Promise.resolve( + runtimeDeps.startGatewayTailscaleExposure({ + tailscaleMode: params.tailscaleMode, + resetOnExit: params.resetOnExit, + port: params.port, + controlUiBasePath: params.controlUiBasePath, + logTailscale: params.logTailscale, + }), + ); const sidecarsPromise = params.minimalTestGateway ? Promise.resolve({ pluginServices: null }) - : (async () => { + : new Promise((resolve) => setImmediate(resolve)).then(async () => { params.log.info("starting channels and sidecars..."); const result = await runtimeDeps.startGatewaySidecars({ cfg: params.gatewayPluginConfigAtStart, @@ -386,25 +391,35 @@ export async function startGatewayPostAttachRuntime( for (const method of STARTUP_UNAVAILABLE_GATEWAY_METHODS) { params.unavailableGatewayMethods.delete(method); } + params.onPluginServices?.(result.pluginServices); + params.onSidecarsReady?.(); return result; - })(); + }); - const [stopGatewayUpdateCheck, tailscaleCleanup, { pluginServices }] = await Promise.all([ + void sidecarsPromise + .then(async () => { + if (params.minimalTestGateway) { + return; + } + const hookRunner = await runtimeDeps.getGlobalHookRunner(); + if (hookRunner?.hasHooks("gateway_start")) { + void hookRunner + .runGatewayStart({ port: params.port }, { port: params.port }) + .catch((err) => { + params.log.warn(`gateway_start hook failed: ${String(err)}`); + }); + } + }) + .catch((err) => { + params.log.warn(`gateway sidecars failed to start: ${String(err)}`); + }); + + const [stopGatewayUpdateCheck, tailscaleCleanup] = await Promise.all([ stopGatewayUpdateCheckPromise, tailscaleCleanupPromise, - sidecarsPromise, ]); - if (!params.minimalTestGateway) { - const hookRunner = await runtimeDeps.getGlobalHookRunner(); - if (hookRunner?.hasHooks("gateway_start")) { - void hookRunner.runGatewayStart({ port: params.port }, { port: params.port }).catch((err) => { - params.log.warn(`gateway_start hook failed: ${String(err)}`); - }); - } - } - - return { stopGatewayUpdateCheck, tailscaleCleanup, pluginServices }; + return { stopGatewayUpdateCheck, tailscaleCleanup, pluginServices: null }; } export const __testing = { diff --git a/src/gateway/server.impl.ts b/src/gateway/server.impl.ts index 4e3cc59eb7b..39e4201dbab 100644 --- a/src/gateway/server.impl.ts +++ b/src/gateway/server.impl.ts @@ -455,6 +455,7 @@ export async function startGatewayServer( throw new Error(gatewayTls.error ?? "gateway tls: failed to enable"); } const serverStartedAt = Date.now(); + let startupSidecarsReady = minimalTestGateway; const channelManager = createChannelManager({ loadConfig: () => applyPluginAutoEnable({ @@ -468,6 +469,7 @@ export async function startGatewayServer( const getReadiness = createReadinessChecker({ channelManager, startedAt: serverStartedAt, + getStartupPending: () => !startupSidecarsReady, }); log.info("starting HTTP server..."); const { @@ -821,11 +823,17 @@ export async function startGatewayServer( logHooks, logChannels, unavailableGatewayMethods, + onPluginServices: (pluginServices) => { + runtimeState.pluginServices = pluginServices; + }, + onSidecarsReady: () => { + startupSidecarsReady = true; + }, }), )); startupTrace.mark("ready"); - // Keep scheduled work inert until post-attach sidecars finish. + // HTTP is live before sidecars finish; /readyz stays red until the startup sidecars settle. const activated = activateGatewayScheduledServices({ minimalTestGateway, cfgAtStart, diff --git a/src/gateway/server/readiness.test.ts b/src/gateway/server/readiness.test.ts index f41373dab7e..f875eb8dfb7 100644 --- a/src/gateway/server/readiness.test.ts +++ b/src/gateway/server/readiness.test.ts @@ -60,6 +60,7 @@ function withReadinessClock(run: () => void) { function createReadinessHarness(params: { startedAgoMs: number; accounts: Record>; + getStartupPending?: () => boolean; cacheTtlMs?: number; }) { const startedAt = Date.now() - params.startedAgoMs; @@ -69,6 +70,7 @@ function createReadinessHarness(params: { readiness: createReadinessChecker({ channelManager: manager, startedAt, + getStartupPending: params.getStartupPending, cacheTtlMs: params.cacheTtlMs, }), }; @@ -85,6 +87,21 @@ describe("createReadinessChecker", () => { }); }); + it("keeps readiness red while startup sidecars are pending", () => { + withReadinessClock(() => { + const { readiness } = createReadinessHarness({ + startedAgoMs: 5 * 60_000, + accounts: {}, + getStartupPending: () => true, + }); + expect(readiness()).toEqual({ + ready: false, + failing: ["startup-sidecars"], + uptimeMs: 300_000, + }); + }); + }); + it("ignores disabled and unconfigured channels", () => { withReadinessClock(() => { const { readiness } = createReadinessHarness({ diff --git a/src/gateway/server/readiness.ts b/src/gateway/server/readiness.ts index 0df380d3e8f..312177a0f6f 100644 --- a/src/gateway/server/readiness.ts +++ b/src/gateway/server/readiness.ts @@ -35,6 +35,7 @@ function shouldIgnoreReadinessFailure( export function createReadinessChecker(deps: { channelManager: ChannelManager; startedAt: number; + getStartupPending?: () => boolean; cacheTtlMs?: number; }): ReadinessChecker { const { channelManager, startedAt } = deps; @@ -51,6 +52,9 @@ export function createReadinessChecker(deps: { const snapshot = channelManager.getRuntimeSnapshot(); const failing: string[] = []; + if (deps.getStartupPending?.()) { + failing.push("startup-sidecars"); + } for (const [channelId, accounts] of Object.entries(snapshot.channelAccounts)) { if (!accounts) { diff --git a/src/hooks/configured.ts b/src/hooks/configured.ts index 7a43658cd76..fcac07ac59f 100644 --- a/src/hooks/configured.ts +++ b/src/hooks/configured.ts @@ -2,11 +2,15 @@ import type { HookConfig, HookInstallRecord } from "../config/types.hooks.js"; import type { OpenClawConfig } from "../config/types.openclaw.js"; import { getLegacyInternalHookHandlers } from "./legacy-config.js"; +function hasEnabledFlag(entry: HookConfig | undefined): boolean { + return entry?.enabled !== false; +} + function hasEnabledEntry(entries: Record | undefined): boolean { if (!entries) { return false; } - return Object.values(entries).some((entry) => entry?.enabled !== false); + return Object.values(entries).some(hasEnabledFlag); } function hasConfiguredInstalls(installs: Record | undefined): boolean { @@ -32,3 +36,41 @@ export function hasConfiguredInternalHooks(config: OpenClawConfig): boolean { } return getLegacyInternalHookHandlers(config).length > 0; } + +export function resolveConfiguredInternalHookNames(config: OpenClawConfig): Set | null { + const internal = config.hooks?.internal; + if (!internal || internal.enabled === false) { + return new Set(); + } + if (internal.enabled === true) { + return null; + } + + const names = new Set(); + for (const [name, entry] of Object.entries(internal.entries ?? {})) { + const trimmed = name.trim(); + if (trimmed && hasEnabledFlag(entry)) { + names.add(trimmed); + } + } + for (const [installId, install] of Object.entries(internal.installs ?? {})) { + const hookNames = install.hooks ?? []; + if (hookNames.length === 0 && installId.trim()) { + return null; + } + for (const hookName of hookNames) { + const trimmedHookName = hookName.trim(); + if (trimmedHookName) { + names.add(trimmedHookName); + } + } + } + + if ((internal.load?.extraDirs ?? []).some((dir) => dir.trim().length > 0)) { + return null; + } + if (getLegacyInternalHookHandlers(config).length > 0) { + return null; + } + return names; +} diff --git a/src/hooks/loader.test.ts b/src/hooks/loader.test.ts index 4ae613979b2..a8c313a3580 100644 --- a/src/hooks/loader.test.ts +++ b/src/hooks/loader.test.ts @@ -7,7 +7,7 @@ import { setLoggerOverride } from "../logging/logger.js"; import { loggingState } from "../logging/state.js"; import { stripAnsi } from "../terminal/ansi.js"; import { captureEnv } from "../test-utils/env.js"; -import { hasConfiguredInternalHooks } from "./configured.js"; +import { hasConfiguredInternalHooks, resolveConfiguredInternalHookNames } from "./configured.js"; import { clearInternalHooks, getRegisteredEventKeys, @@ -151,6 +151,21 @@ describe("loader", () => { hooks: { internal: { load: { extraDirs: ["/tmp/hooks"] } } }, } satisfies OpenClawConfig), ).toBe(true); + expect( + resolveConfiguredInternalHookNames({ + hooks: { internal: { entries: { "session-memory": { enabled: true } } } }, + } satisfies OpenClawConfig), + ).toEqual(new Set(["session-memory"])); + expect( + resolveConfiguredInternalHookNames({ + hooks: { internal: { enabled: true } }, + } satisfies OpenClawConfig), + ).toBeNull(); + expect( + resolveConfiguredInternalHookNames({ + hooks: { internal: { installs: { pack: { source: "path" } } } }, + } satisfies OpenClawConfig), + ).toBeNull(); }); const createLegacyHandlerConfig = () => @@ -203,6 +218,31 @@ describe("loader", () => { } }); + it("loads only explicitly configured discovered hooks", async () => { + const hooksDir = path.join(tmpDir, "managed-hooks"); + await writeDiscoveredHook({ sourceDir: hooksDir, hookName: "keep-hook" }); + await writeDiscoveredHook({ sourceDir: hooksDir, hookName: "skip-hook" }); + + const count = await loadInternalHooks( + { + hooks: { + internal: { + entries: { + "keep-hook": { enabled: true }, + }, + }, + }, + } satisfies OpenClawConfig, + tmpDir, + { managedHooksDir: hooksDir, bundledHooksDir: "/nonexistent/bundled/hooks" }, + ); + + expect(count).toBe(1); + const event = createInternalHookEvent("command", "new", "test-session"); + await triggerInternalHook(event); + expect(event.messages).toEqual(["keep-hook"]); + }); + it("should load a handler from a module", async () => { // Create a test handler module const handlerCode = ` diff --git a/src/hooks/loader.ts b/src/hooks/loader.ts index e7153918784..bf7384c35b9 100644 --- a/src/hooks/loader.ts +++ b/src/hooks/loader.ts @@ -14,7 +14,7 @@ import { createSubsystemLogger } from "../logging/subsystem.js"; import { resolveGlobalSingleton } from "../shared/global-singleton.js"; import { sanitizeForLog } from "../terminal/ansi.js"; import { shouldIncludeHook } from "./config.js"; -import { hasConfiguredInternalHooks } from "./configured.js"; +import { hasConfiguredInternalHooks, resolveConfiguredInternalHookNames } from "./configured.js"; import { buildImportUrl } from "./import-url.js"; import type { InternalHookHandler } from "./internal-hooks.js"; import { registerInternalHook, unregisterInternalHook } from "./internal-hooks.js"; @@ -92,6 +92,7 @@ export async function loadInternalHooks( } let loadedCount = 0; + const configuredNames = resolveConfiguredInternalHookNames(cfg); // 1. Load hooks from directories (new system) try { @@ -102,7 +103,12 @@ export async function loadInternalHooks( }); // Filter by eligibility - const eligible = hookEntries.filter((entry) => shouldIncludeHook({ entry, config: cfg })); + const eligible = hookEntries.filter((entry) => { + if (configuredNames && !configuredNames.has(entry.hook.name)) { + return false; + } + return shouldIncludeHook({ entry, config: cfg }); + }); for (const entry of eligible) { try {