Files
openclaw/src/gateway/gateway-cli-backend.live.test.ts
Alex Knight c438dadc5c Fix Claude CLI runtime migration for gateway turns (#82546)
Summary:
- The PR adds model-scoped `claude-cli` runtime policy to Anthropic CLI migration/default backfill, updates the gateway CLI live-smoke config, tests, and changelog.
- Reproducibility: yes. source inspection gives a high-confidence reproduction path: current main writes `clau ... del/provider-scoped runtime policy. I did not run a live Telegram/Dashboard repro in this read-only review.

Automerge notes:
- No ClawSweeper repair was needed after automerge opt-in.

Validation:
- ClawSweeper review passed for head 62cf54484f.
- Required merge gates passed before the squash merge.

Prepared head SHA: 62cf54484f
Review: https://github.com/openclaw/openclaw/pull/82546#issuecomment-4466676206

Co-authored-by: Alex Knight <15041791+amknight@users.noreply.github.com>
2026-05-16 11:54:48 +00:00

639 lines
23 KiB
TypeScript

import { randomBytes, randomUUID } from "node:crypto";
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { describe, expect, it } from "vitest";
import { resolveCliBackendConfig, resolveCliBackendLiveTest } from "../agents/cli-backends.js";
import { isLiveTestEnabled } from "../agents/live-test-helpers.js";
import { shouldSkipLiveProviderDrift } from "../agents/live-test-provider-drift.js";
import { parseModelRef } from "../agents/model-selection.js";
import { clearRuntimeConfigSnapshot, type OpenClawConfig } from "../config/config.js";
import { isTruthyEnvValue } from "../infra/env.js";
import {
applyCliBackendLiveEnv,
createBootstrapWorkspace,
ensurePairedTestGatewayClientIdentity,
getFreeGatewayPort,
matchesCliBackendReply,
parseImageMode,
resolveCliModelSwitchProbeTarget,
resolveCliBackendLiveArgs,
resolveCliBackendLiveModelSelection,
parseJsonStringArray,
restoreCliBackendLiveEnv,
shouldRunCliImageProbe,
shouldRunCliModelSwitchProbe,
shouldRunCliMcpProbe,
snapshotCliBackendLiveEnv,
type SystemPromptReport,
withClaudeMcpConfigOverrides,
connectTestGatewayClient,
} from "./gateway-cli-backend.live-helpers.js";
import {
verifyCliBackendImageProbe,
verifyCliCronMcpLoopbackPreflight,
verifyCliCronMcpProbe,
} from "./gateway-cli-backend.live-probe-helpers.js";
import { startGatewayServer } from "./server.js";
import { extractPayloadText } from "./test-helpers.agent-results.js";
const LIVE = isLiveTestEnabled();
const CLI_LIVE = isTruthyEnvValue(process.env.OPENCLAW_LIVE_CLI_BACKEND);
const CLI_RESUME = isTruthyEnvValue(process.env.OPENCLAW_LIVE_CLI_BACKEND_RESUME_PROBE);
const CLI_DEBUG = isTruthyEnvValue(process.env.OPENCLAW_LIVE_CLI_BACKEND_DEBUG);
const CLI_CI_SAFE_CODEX_CONFIG = isTruthyEnvValue(
process.env.OPENCLAW_LIVE_CLI_BACKEND_USE_CI_SAFE_CODEX_CONFIG,
);
const CLI_MCP_SCHEMA_PROBE = isTruthyEnvValue(
process.env.OPENCLAW_LIVE_CLI_BACKEND_MCP_SCHEMA_PROBE,
);
const describeLive = LIVE && CLI_LIVE ? describe : describe.skip;
const MCP_SCHEMA_PROBE_PLUGIN_ID = "mcp-schema-probe";
const MCP_SCHEMA_PROBE_TOOL_NAME = "mcp_schema_probe_no_args";
const DEFAULT_PROVIDER = "claude-cli";
const DEFAULT_MODEL =
resolveCliBackendLiveTest(DEFAULT_PROVIDER)?.defaultModelRef ?? "claude-cli/claude-sonnet-4-6";
// The cron/MCP live probe now tolerates more cancelled tool-call retries in CI,
// so the outer test budget needs enough headroom to finish those retries.
const CLI_BACKEND_LIVE_TIMEOUT_MS = 20 * 60_000;
const CLI_BACKEND_REQUEST_TIMEOUT_MS = parsePositiveIntegerEnv(
"OPENCLAW_LIVE_CLI_BACKEND_REQUEST_TIMEOUT_MS",
15 * 60_000,
);
const CLI_BACKEND_AGENT_TIMEOUT_SECONDS = Math.max(
1,
Math.ceil(CLI_BACKEND_REQUEST_TIMEOUT_MS / 1000) - 10,
);
function parsePositiveIntegerEnv(name: string, fallback: number): number {
const raw = process.env[name]?.trim();
if (!raw) {
return fallback;
}
const value = Number(raw);
if (!Number.isSafeInteger(value) || value <= 0) {
throw new Error(`${name} must be a positive integer. Got: ${JSON.stringify(raw)}`);
}
return value;
}
function logCliBackendLiveStep(step: string, details?: Record<string, unknown>): void {
if (!CLI_DEBUG) {
return;
}
const suffix = details && Object.keys(details).length > 0 ? ` ${JSON.stringify(details)}` : "";
console.error(`[gateway-cli-live] ${step}${suffix}`);
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
function openAiProviderConfigForCodexCli(
modelKey: string,
): NonNullable<NonNullable<OpenClawConfig["models"]>["providers"]>["openai"] {
const parsed = parseModelRef(modelKey, DEFAULT_PROVIDER);
const modelId = parsed?.model?.trim() || "gpt-5.5";
return {
api: "openai-responses",
baseUrl: "https://api.openai.com/v1",
models: [
{
contextWindow: 1_047_576,
cost: { cacheRead: 0, cacheWrite: 0, input: 0, output: 0 },
id: modelId,
input: ["text"],
maxTokens: 32_768,
name: modelId,
reasoning: true,
},
],
timeoutSeconds: Math.ceil(CLI_BACKEND_REQUEST_TIMEOUT_MS / 1000),
};
}
function isProviderCapacityError(error: unknown): boolean {
const message = error instanceof Error ? `${error.name}: ${error.message}` : String(error);
const normalized = message.toLowerCase();
return (
normalized.includes("529") &&
(normalized.includes("overloaded") || normalized.includes("capacity"))
);
}
async function requestWithProviderCapacityRetry<T>(
providerId: string,
label: string,
request: () => Promise<T>,
): Promise<T | undefined> {
const maxAttempts = providerId === "claude-cli" ? 3 : 1;
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
try {
return await request();
} catch (error) {
if (!isProviderCapacityError(error) || attempt >= maxAttempts) {
if (
shouldSkipLiveProviderDrift({
error,
allowAuth: true,
allowBilling: true,
})
) {
console.warn(`SKIP: ${label} skipped because provider account/auth drift blocked it.`);
return undefined;
}
if (providerId === "claude-cli" && isProviderCapacityError(error)) {
console.warn(`SKIP: ${label} skipped because Claude API stayed overloaded.`);
return undefined;
}
throw error;
}
logCliBackendLiveStep("provider-capacity-retry", { label, attempt });
await sleep(15_000 * attempt);
}
}
return undefined;
}
async function createMcpSchemaProbePlugin(tempDir: string): Promise<string> {
const pluginDir = path.join(tempDir, MCP_SCHEMA_PROBE_PLUGIN_ID);
await fs.mkdir(pluginDir, { recursive: true });
const pluginFile = path.join(pluginDir, "index.cjs");
await fs.writeFile(
path.join(pluginDir, "openclaw.plugin.json"),
`${JSON.stringify(
{
id: MCP_SCHEMA_PROBE_PLUGIN_ID,
name: "MCP Schema Probe",
description: "Live test plugin for no-argument MCP tool schemas",
configSchema: { type: "object", properties: {} },
},
null,
2,
)}\n`,
);
await fs.writeFile(
pluginFile,
`module.exports = {
id: "${MCP_SCHEMA_PROBE_PLUGIN_ID}",
name: "MCP Schema Probe",
register(api) {
api.registerTool({
name: "${MCP_SCHEMA_PROBE_TOOL_NAME}",
description: "Live test no-argument tool for MCP schema normalization",
parameters: { type: "object" },
async execute() {
return { content: [{ type: "text", text: "schema probe ok" }] };
},
});
},
};
`,
);
return pluginFile;
}
describeLive("gateway live (cli backend)", () => {
it(
"runs the agent pipeline against the local CLI backend",
async () => {
const preservedEnv = new Set(
parseJsonStringArray(
"OPENCLAW_LIVE_CLI_BACKEND_PRESERVE_ENV",
process.env.OPENCLAW_LIVE_CLI_BACKEND_PRESERVE_ENV,
) ?? [],
);
const previousEnv = snapshotCliBackendLiveEnv();
clearRuntimeConfigSnapshot();
applyCliBackendLiveEnv(preservedEnv);
const token = `test-${randomUUID()}`;
process.env.OPENCLAW_GATEWAY_TOKEN = token;
const port = await getFreeGatewayPort();
logCliBackendLiveStep("env-ready", { port });
const rawModel = process.env.OPENCLAW_LIVE_CLI_BACKEND_MODEL ?? DEFAULT_MODEL;
const initialParsed = parseModelRef(rawModel, "claude-cli");
const initialProviderId = initialParsed?.provider ?? "";
const initialModelKey = initialParsed
? `${initialProviderId}/${initialParsed.model}`
: rawModel;
const initialModelSwitchTarget = resolveCliModelSwitchProbeTarget(
initialProviderId,
initialModelKey,
);
const modelSelection = resolveCliBackendLiveModelSelection({
rawModel,
defaultProvider: "claude-cli",
modelSwitchTarget: initialModelSwitchTarget,
});
const providerId = modelSelection.providerId;
const modelKey = modelSelection.cliModelKey;
const configModelKey = modelSelection.configModelKey;
const backendResolved = resolveCliBackendConfig(providerId);
const enableCliImageProbe = shouldRunCliImageProbe(providerId);
const enableCliMcpProbe = shouldRunCliMcpProbe(providerId);
const enableCliModelSwitchProbe = shouldRunCliModelSwitchProbe(providerId, modelKey);
const modelSwitchTarget = enableCliModelSwitchProbe
? modelSelection.configModelSwitchTarget
: undefined;
logCliBackendLiveStep("model-selected", {
providerId,
modelKey,
configModelKey,
enableCliImageProbe,
enableCliMcpProbe,
enableCliModelSwitchProbe,
modelSwitchTarget,
});
const providerDefaults = backendResolved?.config;
const cliCommand = process.env.OPENCLAW_LIVE_CLI_BACKEND_COMMAND ?? providerDefaults?.command;
if (!cliCommand) {
throw new Error(
`OPENCLAW_LIVE_CLI_BACKEND_COMMAND is required for provider "${providerId}".`,
);
}
const { args: baseCliArgs, resumeArgs: baseCliResumeArgs } = resolveCliBackendLiveArgs({
providerId,
defaultArgs: providerDefaults?.args,
defaultResumeArgs: providerDefaults?.resumeArgs,
});
const cliClearEnv =
parseJsonStringArray(
"OPENCLAW_LIVE_CLI_BACKEND_CLEAR_ENV",
process.env.OPENCLAW_LIVE_CLI_BACKEND_CLEAR_ENV,
) ??
providerDefaults?.clearEnv ??
[];
const filteredCliClearEnv = cliClearEnv.filter((name) => !preservedEnv.has(name));
const preservedCliEnv = Object.fromEntries(
[...preservedEnv]
.map((name) => [name, process.env[name]])
.filter((entry): entry is [string, string] => typeof entry[1] === "string"),
);
const cliImageArg =
process.env.OPENCLAW_LIVE_CLI_BACKEND_IMAGE_ARG?.trim() || providerDefaults?.imageArg;
const cliImageMode =
parseImageMode(process.env.OPENCLAW_LIVE_CLI_BACKEND_IMAGE_MODE) ??
providerDefaults?.imageMode;
if (cliImageMode && !cliImageArg) {
throw new Error(
"OPENCLAW_LIVE_CLI_BACKEND_IMAGE_MODE requires OPENCLAW_LIVE_CLI_BACKEND_IMAGE_ARG.",
);
}
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-live-cli-"));
const stateDir = path.join(tempDir, "state");
await fs.mkdir(stateDir, { recursive: true });
const schemaProbePluginPath = CLI_MCP_SCHEMA_PROBE
? await createMcpSchemaProbePlugin(tempDir)
: undefined;
const useMinimalToolsProfile = providerId === "codex-cli" && !schemaProbePluginPath;
process.env.OPENCLAW_STATE_DIR = stateDir;
const bundleMcp = backendResolved?.bundleMcp === true;
const bootstrapWorkspace = await createBootstrapWorkspace(tempDir);
const disableMcpConfig = process.env.OPENCLAW_LIVE_CLI_BACKEND_DISABLE_MCP_CONFIG !== "0";
let cliArgs = baseCliArgs;
if (
bundleMcp &&
disableMcpConfig &&
backendResolved?.bundleMcpMode === "claude-config-file"
) {
const mcpConfigPath = path.join(tempDir, "claude-mcp.json");
await fs.writeFile(mcpConfigPath, `${JSON.stringify({ mcpServers: {} }, null, 2)}\n`);
cliArgs = withClaudeMcpConfigOverrides(baseCliArgs, mcpConfigPath);
}
const cfg: OpenClawConfig = {};
const cfgWithCliBackends = cfg as OpenClawConfig & {
agents?: {
defaults?: {
cliBackends?: Record<string, Record<string, unknown>>;
};
};
};
const existingBackends = cfgWithCliBackends.agents?.defaults?.cliBackends ?? {};
const nextCfg = {
...cfg,
...(schemaProbePluginPath
? {
plugins: {
...cfg.plugins,
load: {
...cfg.plugins?.load,
paths: [...(cfg.plugins?.load?.paths ?? []), schemaProbePluginPath],
},
entries: {
...cfg.plugins?.entries,
[MCP_SCHEMA_PROBE_PLUGIN_ID]: { enabled: true },
},
},
}
: {}),
gateway: {
mode: "local",
...cfg.gateway,
port,
auth: { mode: "token", token },
},
models:
providerId === "codex-cli"
? {
...cfg.models,
providers: {
...cfg.models?.providers,
openai: {
...openAiProviderConfigForCodexCli(configModelKey),
...cfg.models?.providers?.openai,
},
},
}
: cfg.models,
...(useMinimalToolsProfile
? {
tools: {
...cfg.tools,
profile: "minimal" as const,
},
}
: {}),
agents: {
...cfg.agents,
defaults: {
...cfg.agents?.defaults,
...(bootstrapWorkspace ? { workspace: bootstrapWorkspace.workspaceRootDir } : {}),
model: { primary: configModelKey },
models: {
[configModelKey]: { agentRuntime: modelSelection.agentRuntime },
...(modelSwitchTarget
? { [modelSwitchTarget]: { agentRuntime: modelSelection.agentRuntime } }
: {}),
},
agentRuntime: modelSelection.agentRuntime,
cliBackends: {
...existingBackends,
[providerId]: {
command: cliCommand,
args: cliArgs,
resumeArgs: baseCliResumeArgs,
clearEnv: filteredCliClearEnv.length > 0 ? filteredCliClearEnv : undefined,
env: Object.keys(preservedCliEnv).length > 0 ? preservedCliEnv : undefined,
systemPromptWhen: providerDefaults?.systemPromptWhen ?? "never",
...(cliImageArg
? {
imageArg: cliImageArg,
imageMode: cliImageMode,
imagePathScope: providerDefaults?.imagePathScope,
}
: {}),
},
},
sandbox: { mode: "off" },
},
},
};
const tempConfigPath = path.join(tempDir, "openclaw.json");
await fs.writeFile(tempConfigPath, `${JSON.stringify(nextCfg, null, 2)}\n`);
process.env.OPENCLAW_CONFIG_PATH = tempConfigPath;
const deviceIdentity = await ensurePairedTestGatewayClientIdentity();
logCliBackendLiveStep("config-written", {
tempConfigPath,
stateDir,
cliCommand,
cliArgs,
});
const server = await startGatewayServer(port, {
bind: "loopback",
auth: { mode: "token", token },
controlUiEnabled: false,
});
logCliBackendLiveStep("server-started");
const client = await connectTestGatewayClient({
url: `ws://127.0.0.1:${port}`,
token,
deviceIdentity,
});
logCliBackendLiveStep("client-connected");
try {
const sessionKey = "agent:dev:live-cli-backend";
const nonce = randomBytes(3).toString("hex").toUpperCase();
const memoryNonce = randomBytes(3).toString("hex").toUpperCase();
const memoryToken = `CLI-MEM-${memoryNonce}`;
logCliBackendLiveStep("agent-request:start", { sessionKey, nonce });
const payload = await requestWithProviderCapacityRetry(providerId, "agent request", () =>
client.request(
"agent",
{
sessionKey,
idempotencyKey: `idem-${randomUUID()}`,
message:
providerId === "codex-cli"
? `Do not inspect files or run tools. Reply with exactly: CLI-BACKEND-${nonce}.`
: enableCliModelSwitchProbe
? `Please include the token CLI-BACKEND-${nonce} in your reply.` +
` Also remember this session note for later: ${memoryToken}.` +
" Do not include the note in your reply."
: `Please include the token CLI-BACKEND-${nonce} in your reply.`,
deliver: false,
timeout: CLI_BACKEND_AGENT_TIMEOUT_SECONDS,
},
{ expectFinal: true, timeoutMs: CLI_BACKEND_REQUEST_TIMEOUT_MS },
),
);
if (!payload) {
return;
}
if (providerId === "codex-cli" && payload?.status === "timeout") {
console.warn(
"SKIP: Codex CLI backend live smoke timed out waiting for a model response.",
);
return;
}
if (payload?.status !== "ok") {
throw new Error(`agent status=${String(payload?.status)}`);
}
logCliBackendLiveStep("agent-request:done", { status: payload?.status });
const text = extractPayloadText(payload?.result);
if (providerId === "codex-cli") {
expect(text).toContain(`CLI-BACKEND-${nonce}`);
} else {
const resultWithMeta = payload?.result as {
meta?: { systemPromptReport?: SystemPromptReport };
};
if (enableCliModelSwitchProbe) {
expect(text.trim().length).toBeGreaterThan(0);
} else {
expect(text).toContain(`CLI-BACKEND-${nonce}`);
}
const injectedFileNames =
resultWithMeta.meta?.systemPromptReport?.injectedWorkspaceFiles?.map(
(entry) => entry.name,
) ?? [];
for (const expectedFile of bootstrapWorkspace?.expectedInjectedFiles ?? []) {
expect(injectedFileNames).toContain(expectedFile);
}
}
if (modelSwitchTarget) {
const switchNonce = randomBytes(3).toString("hex").toUpperCase();
logCliBackendLiveStep("agent-switch:start", {
sessionKey,
fromModel: modelKey,
toModel: modelSwitchTarget,
switchNonce,
memoryToken,
});
const patchPayload = await client.request("sessions.patch", {
key: sessionKey,
model: modelSwitchTarget,
});
if (!patchPayload || typeof patchPayload !== "object" || !("ok" in patchPayload)) {
throw new Error(
`sessions.patch failed for model switch: ${JSON.stringify(patchPayload)}`,
);
}
const switchPayload = await requestWithProviderCapacityRetry(
providerId,
"agent model-switch request",
() =>
client.request(
"agent",
{
sessionKey,
idempotencyKey: `idem-${randomUUID()}`,
message:
"We just switched from Claude Sonnet to Claude Opus in the same session. " +
`What session note did I ask you to remember earlier? ` +
`Reply with exactly: CLI backend SWITCH OK ${switchNonce} <remembered-note>.`,
deliver: false,
timeout: CLI_BACKEND_AGENT_TIMEOUT_SECONDS,
},
{ expectFinal: true, timeoutMs: CLI_BACKEND_REQUEST_TIMEOUT_MS },
),
);
if (!switchPayload) {
return;
}
if (switchPayload?.status !== "ok") {
throw new Error(`switch status=${String(switchPayload?.status)}`);
}
logCliBackendLiveStep("agent-switch:done", { status: switchPayload?.status });
const switchText = extractPayloadText(switchPayload?.result);
expect(
matchesCliBackendReply(
switchText,
`CLI backend SWITCH OK ${switchNonce} ${memoryToken}.`,
),
).toBe(true);
} else if (CLI_RESUME) {
const resumeNonce = randomBytes(3).toString("hex").toUpperCase();
logCliBackendLiveStep("agent-resume:start", { sessionKey, resumeNonce });
const resumePayload = await requestWithProviderCapacityRetry(
providerId,
"agent resume request",
() =>
client.request(
"agent",
{
sessionKey,
idempotencyKey: `idem-${randomUUID()}`,
message:
providerId === "codex-cli"
? `Do not inspect files or run tools. Reply with exactly: CLI-RESUME-${resumeNonce}.`
: `Reply with exactly: CLI backend RESUME OK ${resumeNonce}.`,
deliver: false,
timeout: CLI_BACKEND_AGENT_TIMEOUT_SECONDS,
},
{ expectFinal: true, timeoutMs: CLI_BACKEND_REQUEST_TIMEOUT_MS },
),
);
if (!resumePayload) {
return;
}
if (resumePayload?.status !== "ok") {
throw new Error(`resume status=${String(resumePayload?.status)}`);
}
logCliBackendLiveStep("agent-resume:done", { status: resumePayload?.status });
const resumeText = extractPayloadText(resumePayload?.result);
if (providerId === "codex-cli") {
expect(resumeText).toContain(`CLI-RESUME-${resumeNonce}`);
} else {
expect(
matchesCliBackendReply(resumeText, `CLI backend RESUME OK ${resumeNonce}.`),
).toBe(true);
}
}
if (enableCliImageProbe) {
const imageSessionKey =
providerId === "codex-cli"
? `agent:dev:live-cli-backend-image:${randomUUID()}`
: sessionKey;
logCliBackendLiveStep("image-probe:start", { sessionKey: imageSessionKey });
await verifyCliBackendImageProbe({
client,
providerId,
sessionKey: imageSessionKey,
tempDir,
bootstrapWorkspace,
});
logCliBackendLiveStep("image-probe:done");
}
if (enableCliMcpProbe) {
logCliBackendLiveStep("cron-mcp-loopback-preflight:start", {
sessionKey,
senderIsOwner: true,
});
await verifyCliCronMcpLoopbackPreflight({
sessionKey,
port,
token,
env: process.env,
senderIsOwner: true,
expectedSchemaProbeToolName: schemaProbePluginPath
? MCP_SCHEMA_PROBE_TOOL_NAME
: undefined,
});
logCliBackendLiveStep("cron-mcp-loopback-preflight:done");
if (providerId === "codex-cli" && CLI_CI_SAFE_CODEX_CONFIG) {
logCliBackendLiveStep("cron-mcp-probe:skipped", {
providerId,
reason: "ci-safe-codex-config",
});
} else {
logCliBackendLiveStep("cron-mcp-probe:start", { sessionKey });
await verifyCliCronMcpProbe({
client,
providerId,
sessionKey,
port,
token,
env: process.env,
});
logCliBackendLiveStep("cron-mcp-probe:done");
}
}
} finally {
logCliBackendLiveStep("cleanup:start");
clearRuntimeConfigSnapshot();
await client.stopAndWait();
await server.close();
await fs.rm(tempDir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 });
restoreCliBackendLiveEnv(previousEnv);
logCliBackendLiveStep("cleanup:done");
}
},
CLI_BACKEND_LIVE_TIMEOUT_MS,
);
});