mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-12 01:31:08 +00:00
fix: harden claude-cli live switch smoke
This commit is contained in:
@@ -272,6 +272,7 @@ openclaw models list --json
|
||||
- `OPENCLAW_LIVE_CLI_BACKEND_IMAGE_ARG="--image"` to pass image file paths as CLI args instead of prompt injection.
|
||||
- `OPENCLAW_LIVE_CLI_BACKEND_IMAGE_MODE="repeat"` (or `"list"`) to control how image args are passed when `IMAGE_ARG` is set.
|
||||
- `OPENCLAW_LIVE_CLI_BACKEND_RESUME_PROBE=1` to send a second turn and validate resume flow.
|
||||
- `OPENCLAW_LIVE_CLI_BACKEND_MODEL_SWITCH_PROBE=0` to disable the default Claude Sonnet -> Opus same-session continuity probe (set to `1` to force it on when the selected model supports a switch target).
|
||||
|
||||
Example:
|
||||
|
||||
@@ -301,6 +302,7 @@ Notes:
|
||||
- It runs the live CLI-backend smoke inside the repo Docker image as the non-root `node` user.
|
||||
- It resolves CLI smoke metadata from the owning extension, then installs the matching Linux CLI package (`@anthropic-ai/claude-code`, `@openai/codex`, or `@google/gemini-cli`) into a cached writable prefix at `OPENCLAW_DOCKER_CLI_TOOLS_DIR` (default: `~/.cache/openclaw/docker-cli-tools`).
|
||||
- The live CLI-backend smoke now exercises the same end-to-end flow for Claude, Codex, and Gemini: text turn, image classification turn, then MCP `cron` tool call verified through the gateway CLI.
|
||||
- Claude's default smoke also patches the session from Sonnet to Opus and verifies the resumed session still remembers an earlier note.
|
||||
|
||||
## Live: ACP bind smoke (`/acp spawn ... --bind here`)
|
||||
|
||||
@@ -448,7 +450,7 @@ Live tests discover credentials the same way the CLI does. Practical implication
|
||||
- Per-agent auth profiles: `~/.openclaw/agents/<agentId>/agent/auth-profiles.json` (this is what “profile keys” means in the live tests)
|
||||
- Config: `~/.openclaw/openclaw.json` (or `OPENCLAW_CONFIG_PATH`)
|
||||
- Legacy state dir: `~/.openclaw/credentials/` (copied into the staged live home when present, but not the main profile-key store)
|
||||
- Live local runs copy the active config, per-agent `auth-profiles.json` files, legacy `credentials/`, and supported external CLI auth dirs into a temp test home by default; `agents.*.workspace` / `agentDir` path overrides are stripped in that staged config so probes stay off your real host workspace.
|
||||
- Live local runs copy the active config, per-agent `auth-profiles.json` files, legacy `credentials/`, and supported external CLI auth dirs into a temp test home by default; staged live homes skip `workspace/` and `sandboxes/`, and `agents.*.workspace` / `agentDir` path overrides are stripped so probes stay off your real host workspace.
|
||||
|
||||
If you want to rely on env keys (e.g. exported in your `~/.profile`), run local tests after `source ~/.profile`, or use the Docker runners below (they can mount `~/.profile` into the container).
|
||||
|
||||
|
||||
@@ -40,7 +40,14 @@ openclaw_live_stage_state_dir() {
|
||||
|
||||
mkdir -p "$dest_dir"
|
||||
if [ -d "$source_dir" ]; then
|
||||
tar -C "$source_dir" --exclude=workspace -cf - . | tar -C "$dest_dir" -xf -
|
||||
# Sandbox workspaces can accumulate root-owned artifacts from prior Docker
|
||||
# runs. They are not needed for live-test auth/config staging and can make
|
||||
# temp-dir cleanup fail on exit, so keep them out of the staged state copy.
|
||||
tar -C "$source_dir" \
|
||||
--exclude=workspace \
|
||||
--exclude=sandboxes \
|
||||
-cf - . | tar -C "$dest_dir" -xf -
|
||||
chmod -R u+rwX "$dest_dir" || true
|
||||
if [ -d "$source_dir/workspace" ] && [ ! -e "$dest_dir/workspace" ]; then
|
||||
ln -s "$source_dir/workspace" "$dest_dir/workspace"
|
||||
fi
|
||||
|
||||
@@ -210,6 +210,7 @@ docker run --rm -t \
|
||||
-e OPENCLAW_LIVE_CLI_BACKEND_PRESERVE_ENV="${OPENCLAW_LIVE_CLI_BACKEND_PRESERVE_ENV:-}" \
|
||||
-e OPENCLAW_LIVE_CLI_BACKEND_DISABLE_MCP_CONFIG="$CLI_DISABLE_MCP_CONFIG" \
|
||||
-e OPENCLAW_LIVE_CLI_BACKEND_RESUME_PROBE="${OPENCLAW_LIVE_CLI_BACKEND_RESUME_PROBE:-}" \
|
||||
-e OPENCLAW_LIVE_CLI_BACKEND_MODEL_SWITCH_PROBE="${OPENCLAW_LIVE_CLI_BACKEND_MODEL_SWITCH_PROBE:-}" \
|
||||
-e OPENCLAW_LIVE_CLI_BACKEND_IMAGE_PROBE="${OPENCLAW_LIVE_CLI_BACKEND_IMAGE_PROBE:-}" \
|
||||
-e OPENCLAW_LIVE_CLI_BACKEND_IMAGE_ARG="${OPENCLAW_LIVE_CLI_BACKEND_IMAGE_ARG:-}" \
|
||||
-e OPENCLAW_LIVE_CLI_BACKEND_IMAGE_MODE="${OPENCLAW_LIVE_CLI_BACKEND_IMAGE_MODE:-}" \
|
||||
|
||||
@@ -100,4 +100,26 @@ describe("gateway cli backend live helpers", () => {
|
||||
});
|
||||
expect(gatewayClientState.lastOptions).not.toHaveProperty("requestTimeoutMs");
|
||||
});
|
||||
|
||||
it("defaults the model switch probe to Claude Sonnet -> Opus", async () => {
|
||||
const { resolveCliModelSwitchProbeTarget, shouldRunCliModelSwitchProbe } =
|
||||
await import("./gateway-cli-backend.live-helpers.js");
|
||||
|
||||
delete process.env.OPENCLAW_LIVE_CLI_BACKEND_MODEL_SWITCH_PROBE;
|
||||
|
||||
expect(resolveCliModelSwitchProbeTarget("claude-cli", "claude-cli/claude-sonnet-4-6")).toBe(
|
||||
"claude-cli/claude-opus-4-6",
|
||||
);
|
||||
expect(shouldRunCliModelSwitchProbe("claude-cli", "claude-cli/claude-sonnet-4-6")).toBe(true);
|
||||
expect(shouldRunCliModelSwitchProbe("claude-cli", "claude-cli/claude-opus-4-6")).toBe(false);
|
||||
expect(shouldRunCliModelSwitchProbe("codex-cli", "codex-cli/gpt-5.4")).toBe(false);
|
||||
});
|
||||
|
||||
it("lets env disable the model switch probe", async () => {
|
||||
const { shouldRunCliModelSwitchProbe } = await import("./gateway-cli-backend.live-helpers.js");
|
||||
|
||||
process.env.OPENCLAW_LIVE_CLI_BACKEND_MODEL_SWITCH_PROBE = "0";
|
||||
|
||||
expect(shouldRunCliModelSwitchProbe("claude-cli", "claude-cli/claude-sonnet-4-6")).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -97,6 +97,29 @@ export function shouldRunCliMcpProbe(providerId: string): boolean {
|
||||
return resolveCliBackendLiveTest(providerId)?.defaultMcpProbe === true;
|
||||
}
|
||||
|
||||
export function resolveCliModelSwitchProbeTarget(
|
||||
providerId: string,
|
||||
modelRef: string,
|
||||
): string | undefined {
|
||||
const normalizedProvider = providerId.trim().toLowerCase();
|
||||
const normalizedModelRef = modelRef.trim().toLowerCase();
|
||||
if (normalizedProvider !== "claude-cli") {
|
||||
return undefined;
|
||||
}
|
||||
if (normalizedModelRef !== "claude-cli/claude-sonnet-4-6") {
|
||||
return undefined;
|
||||
}
|
||||
return "claude-cli/claude-opus-4-6";
|
||||
}
|
||||
|
||||
export function shouldRunCliModelSwitchProbe(providerId: string, modelRef: string): boolean {
|
||||
const raw = process.env.OPENCLAW_LIVE_CLI_BACKEND_MODEL_SWITCH_PROBE?.trim();
|
||||
if (raw) {
|
||||
return isTruthyEnvValue(raw);
|
||||
}
|
||||
return typeof resolveCliModelSwitchProbeTarget(providerId, modelRef) === "string";
|
||||
}
|
||||
|
||||
export function matchesCliBackendReply(text: string, expected: string): boolean {
|
||||
const normalized = text.trim();
|
||||
const target = expected.trim();
|
||||
|
||||
@@ -16,8 +16,10 @@ import {
|
||||
matchesCliBackendReply,
|
||||
parseImageMode,
|
||||
parseJsonStringArray,
|
||||
resolveCliModelSwitchProbeTarget,
|
||||
restoreCliBackendLiveEnv,
|
||||
shouldRunCliImageProbe,
|
||||
shouldRunCliModelSwitchProbe,
|
||||
shouldRunCliMcpProbe,
|
||||
snapshotCliBackendLiveEnv,
|
||||
type SystemPromptReport,
|
||||
@@ -81,11 +83,17 @@ describeLive("gateway live (cli backend)", () => {
|
||||
const backendResolved = resolveCliBackendConfig(providerId);
|
||||
const enableCliImageProbe = shouldRunCliImageProbe(providerId);
|
||||
const enableCliMcpProbe = shouldRunCliMcpProbe(providerId);
|
||||
const enableCliModelSwitchProbe = shouldRunCliModelSwitchProbe(providerId, modelKey);
|
||||
const modelSwitchTarget = enableCliModelSwitchProbe
|
||||
? resolveCliModelSwitchProbeTarget(providerId, modelKey)
|
||||
: undefined;
|
||||
logCliBackendLiveStep("model-selected", {
|
||||
providerId,
|
||||
modelKey,
|
||||
enableCliImageProbe,
|
||||
enableCliMcpProbe,
|
||||
enableCliModelSwitchProbe,
|
||||
modelSwitchTarget,
|
||||
});
|
||||
const providerDefaults = backendResolved?.config;
|
||||
|
||||
@@ -173,7 +181,10 @@ describeLive("gateway live (cli backend)", () => {
|
||||
...cfg.agents?.defaults,
|
||||
...(bootstrapWorkspace ? { workspace: bootstrapWorkspace.workspaceRootDir } : {}),
|
||||
model: { primary: modelKey },
|
||||
models: { [modelKey]: {} },
|
||||
models: {
|
||||
[modelKey]: {},
|
||||
...(modelSwitchTarget ? { [modelSwitchTarget]: {} } : {}),
|
||||
},
|
||||
cliBackends: {
|
||||
...existingBackends,
|
||||
[providerId]: {
|
||||
@@ -216,6 +227,8 @@ describeLive("gateway live (cli backend)", () => {
|
||||
try {
|
||||
const sessionKey = "agent:dev:live-cli-backend";
|
||||
const nonce = randomBytes(3).toString("hex").toUpperCase();
|
||||
const memoryNonce = randomBytes(3).toString("hex").toUpperCase();
|
||||
const memoryToken = `CLI-MEM-${memoryNonce}`;
|
||||
logCliBackendLiveStep("agent-request:start", { sessionKey, nonce });
|
||||
const payload = await client.request(
|
||||
"agent",
|
||||
@@ -225,7 +238,11 @@ describeLive("gateway live (cli backend)", () => {
|
||||
message:
|
||||
providerId === "codex-cli"
|
||||
? `Please include the token CLI-BACKEND-${nonce} in your reply.`
|
||||
: `Reply with exactly: CLI backend OK ${nonce}.`,
|
||||
: enableCliModelSwitchProbe
|
||||
? `Reply with exactly: CLI backend OK ${nonce}.` +
|
||||
` Also remember this session note for later: ${memoryToken}.` +
|
||||
" Do not include the note in your reply."
|
||||
: `Reply with exactly: CLI backend OK ${nonce}.`,
|
||||
deliver: false,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
@@ -250,7 +267,49 @@ describeLive("gateway live (cli backend)", () => {
|
||||
).toEqual(expect.arrayContaining(bootstrapWorkspace?.expectedInjectedFiles ?? []));
|
||||
}
|
||||
|
||||
if (CLI_RESUME) {
|
||||
if (modelSwitchTarget) {
|
||||
const switchNonce = randomBytes(3).toString("hex").toUpperCase();
|
||||
logCliBackendLiveStep("agent-switch:start", {
|
||||
sessionKey,
|
||||
fromModel: modelKey,
|
||||
toModel: modelSwitchTarget,
|
||||
switchNonce,
|
||||
memoryToken,
|
||||
});
|
||||
const patchPayload = await client.request("sessions.patch", {
|
||||
key: sessionKey,
|
||||
model: modelSwitchTarget,
|
||||
});
|
||||
if (!patchPayload || typeof patchPayload !== "object" || !("ok" in patchPayload)) {
|
||||
throw new Error(
|
||||
`sessions.patch failed for model switch: ${JSON.stringify(patchPayload)}`,
|
||||
);
|
||||
}
|
||||
const switchPayload = await client.request(
|
||||
"agent",
|
||||
{
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${randomUUID()}`,
|
||||
message:
|
||||
"We just switched from Claude Sonnet to Claude Opus in the same session. " +
|
||||
`What session note did I ask you to remember earlier? ` +
|
||||
`Reply with exactly: CLI backend SWITCH OK ${switchNonce} <remembered-note>.`,
|
||||
deliver: false,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
);
|
||||
if (switchPayload?.status !== "ok") {
|
||||
throw new Error(`switch status=${String(switchPayload?.status)}`);
|
||||
}
|
||||
logCliBackendLiveStep("agent-switch:done", { status: switchPayload?.status });
|
||||
const switchText = extractPayloadText(switchPayload?.result);
|
||||
expect(
|
||||
matchesCliBackendReply(
|
||||
switchText,
|
||||
`CLI backend SWITCH OK ${switchNonce} ${memoryToken}.`,
|
||||
),
|
||||
).toBe(true);
|
||||
} else if (CLI_RESUME) {
|
||||
const resumeNonce = randomBytes(3).toString("hex").toUpperCase();
|
||||
logCliBackendLiveStep("agent-resume:start", { sessionKey, resumeNonce });
|
||||
const resumePayload = await client.request(
|
||||
|
||||
Reference in New Issue
Block a user