fix(cli-runner): drop stale claude-cli sessionId when transcript missing (#77011)

Probe ~/.claude/projects/.../<sid>.jsonl in prepareCliRunContext before
emitting `claude --resume <sid>`. When the on-disk transcript no longer
exists (e.g. after a half-installed update.run, manual prune, or Claude
CLI reinstall), drop the saved cliSessionBinding so this turn starts a
fresh session instead of timing out on a dead resume target. The post-run
session-store flow then writes the new sessionId back, ending the loop.
This commit is contained in:
openperf
2026-05-04 09:23:38 +08:00
committed by Peter Steinberger
parent 708c7cd2e2
commit 7e296aef4b
4 changed files with 172 additions and 15 deletions

View File

@@ -43,6 +43,7 @@ Docs: https://docs.openclaw.ai
- Diagnostics: keep webhook/message OTEL attributes and Prometheus delivery labels low-cardinality and omit raw chat/message IDs from spans, so progress-draft and message-tool modes do not leak high-cardinality messaging identifiers.
- Telegram: render shared interactive reply buttons in reply delivery so plugin approval messages show inline keyboards. (#76238) Thanks @keshavbotagent.
- Agents/cli-runner: drop a saved `claude-cli` resume sessionId at preparation time when its on-disk transcript no longer exists in `~/.claude/projects/`, so a stale binding from a half-installed `update.run` cannot trap follow-up runs (auto-reply / Telegram direct) in a `claude --resume` timeout loop; the run starts fresh and the new sessionId is written back through the existing post-run flow. (#77030; refs #77011) Thanks @openperf.
- Release validation: install the cross-OS TypeScript harness through Windows-safe Node/npm shims so native Windows package checks reach the OpenClaw smoke suites instead of exiting before artifact capture. Thanks @vincentkoc.
- Release validation: let Windows packaged-upgrade checks continue after the shipped 2026.5.2 updater hits its native-module swap cleanup fallback, verifying the fallback-installed candidate through package metadata and downstream smoke instead of crashing on the immediate update-status probe. Thanks @vincentkoc.
- Doctor/plugins: skip channel-derived official plugin installs when another configured plugin is the effective owner for the same channel, so `doctor --repair` does not reinstall `feishu` while `openclaw-lark` handles `channels.feishu`. Fixes #76623. Thanks @fuyizheng3120.

View File

@@ -19,6 +19,11 @@ vi.mock("../../plugins/hook-runner-global.js", () => ({
getGlobalHookRunner: vi.fn(() => null),
}));
vi.mock("../../plugin-sdk/anthropic-cli.js", () => ({
CLAUDE_CLI_BACKEND_ID: "claude-cli",
isClaudeCliProvider: (providerId: string) => providerId === "claude-cli",
}));
vi.mock("../../tts/tts.js", () => ({
buildTtsSystemPromptHint: vi.fn(() => undefined),
}));
@@ -668,4 +673,128 @@ describe("shouldSkipLocalCliCredentialEpoch", () => {
fs.rmSync(dir, { recursive: true, force: true });
}
});
it("drops the claude-cli sessionId when the on-disk transcript is missing (#77011)", async () => {
const { dir, sessionFile } = createSessionFile();
try {
cliBackendsTesting.setDepsForTest({
resolvePluginSetupCliBackend: () => undefined,
resolveRuntimeCliBackends: () => [
{
id: "claude-cli",
pluginId: "anthropic",
bundleMcp: false,
config: {
command: "claude",
args: ["--print"],
resumeArgs: ["--resume", "{sessionId}"],
output: "jsonl",
input: "stdin",
sessionMode: "existing",
},
},
],
});
const transcriptCheck = vi.fn(async () => false);
setCliRunnerPrepareTestDeps({
claudeCliSessionTranscriptHasContent: transcriptCheck,
});
const context = await prepareCliRunContext({
sessionId: "session-test",
sessionKey: "agent:main:telegram:direct:peer",
sessionFile,
workspaceDir: dir,
prompt: "follow-up",
provider: "claude-cli",
model: "opus",
timeoutMs: 1_000,
runId: "run-77011-missing",
cliSessionBinding: { sessionId: "stale-claude-sid" },
cliSessionId: "stale-claude-sid",
config: createCliBackendConfig({ systemPromptOverride: null }),
});
expect(transcriptCheck).toHaveBeenCalledWith({ sessionId: "stale-claude-sid" });
expect(context.reusableCliSession).toEqual({ invalidatedReason: "missing-transcript" });
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
});
it("keeps the claude-cli sessionId when the on-disk transcript is present", async () => {
const { dir, sessionFile } = createSessionFile();
try {
cliBackendsTesting.setDepsForTest({
resolvePluginSetupCliBackend: () => undefined,
resolveRuntimeCliBackends: () => [
{
id: "claude-cli",
pluginId: "anthropic",
bundleMcp: false,
config: {
command: "claude",
args: ["--print"],
resumeArgs: ["--resume", "{sessionId}"],
output: "jsonl",
input: "stdin",
sessionMode: "existing",
},
},
],
});
const transcriptCheck = vi.fn(async () => true);
setCliRunnerPrepareTestDeps({
claudeCliSessionTranscriptHasContent: transcriptCheck,
});
const context = await prepareCliRunContext({
sessionId: "session-test",
sessionKey: "agent:main:telegram:direct:peer",
sessionFile,
workspaceDir: dir,
prompt: "follow-up",
provider: "claude-cli",
model: "opus",
timeoutMs: 1_000,
runId: "run-77011-present",
cliSessionBinding: { sessionId: "live-claude-sid" },
cliSessionId: "live-claude-sid",
config: createCliBackendConfig({ systemPromptOverride: null }),
});
expect(transcriptCheck).toHaveBeenCalledWith({ sessionId: "live-claude-sid" });
expect(context.reusableCliSession).toEqual({ sessionId: "live-claude-sid" });
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
});
it("does not probe the transcript for non-claude-cli providers", async () => {
const { dir, sessionFile } = createSessionFile();
try {
const transcriptCheck = vi.fn(async () => false);
setCliRunnerPrepareTestDeps({
claudeCliSessionTranscriptHasContent: transcriptCheck,
});
const context = await prepareCliRunContext({
sessionId: "session-test",
sessionFile,
workspaceDir: dir,
prompt: "latest ask",
provider: "test-cli",
model: "test-model",
timeoutMs: 1_000,
runId: "run-77011-other-provider",
cliSessionBinding: { sessionId: "test-cli-sid" },
config: createCliBackendConfig({ systemPromptOverride: null }),
});
expect(transcriptCheck).not.toHaveBeenCalled();
expect(context.reusableCliSession).toEqual({ sessionId: "test-cli-sid" });
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
});
});

View File

@@ -4,6 +4,7 @@ import {
createMcpLoopbackServerConfig,
getActiveMcpLoopbackRuntime,
} from "../../gateway/mcp-http.loopback-runtime.js";
import { isClaudeCliProvider } from "../../plugin-sdk/anthropic-cli.js";
import type {
CliBackendAuthEpochMode,
CliBackendPreparedExecution,
@@ -29,6 +30,7 @@ import {
import { CLI_AUTH_EPOCH_VERSION, resolveCliAuthEpoch } from "../cli-auth-epoch.js";
import { resolveCliBackendConfig } from "../cli-backends.js";
import { hashCliSessionText, resolveCliSessionReuse } from "../cli-session.js";
import { claudeCliSessionTranscriptHasContent } from "../command/attempt-execution.helpers.js";
import { resolveHeartbeatPromptForSystemPrompt } from "../heartbeat-system-prompt.js";
import {
resolveBootstrapMaxChars,
@@ -51,7 +53,7 @@ import {
loadCliSessionHistoryMessages,
loadCliSessionReseedMessages,
} from "./session-history.js";
import type { PreparedCliRunContext, RunCliAgentParams } from "./types.js";
import type { CliReusableSession, PreparedCliRunContext, RunCliAgentParams } from "./types.js";
const prepareDeps = {
makeBootstrapWarn: makeBootstrapWarnImpl,
@@ -62,6 +64,9 @@ const prepareDeps = {
resolveOpenClawReferencePaths: async (
params: Parameters<typeof import("../docs-path.js").resolveOpenClawReferencePaths>[0],
) => (await import("../docs-path.js")).resolveOpenClawReferencePaths(params),
// Surfaced as a dep so tests can stub the on-disk Claude CLI transcript probe
// without touching ~/.claude/projects.
claudeCliSessionTranscriptHasContent,
};
export function setCliRunnerPrepareTestDeps(overrides: Partial<typeof prepareDeps>): void {
@@ -256,19 +261,36 @@ export async function prepareCliRunContext(
...(preparedBackendEnv ? { env: preparedBackendEnv } : {}),
...(preparedBackendCleanup ? { cleanup: preparedBackendCleanup } : {}),
};
const reusableCliSession = params.cliSessionBinding
? resolveCliSessionReuse({
binding: params.cliSessionBinding,
authProfileId: effectiveAuthProfileId,
authEpoch,
authEpochVersion: CLI_AUTH_EPOCH_VERSION,
extraSystemPromptHash,
mcpConfigHash: preparedBackendFinal.mcpConfigHash,
mcpResumeHash: preparedBackendFinal.mcpResumeHash,
})
: params.cliSessionId
? { sessionId: params.cliSessionId }
: {};
// Pre-flight: if a saved Claude CLI sessionId points at a transcript that no
// longer exists on disk (e.g. update.run aborted mid-swap, Claude CLI was
// reinstalled, or the projects tree was manually pruned), `claude --resume`
// hangs or fails outside the cli-runner session_expired path. The persisted
// binding then never gets refreshed, causing every subsequent turn to retry
// the same dead sessionId. Drop the binding here so this turn starts fresh
// and the post-run flow writes the new sessionId back via setCliSessionBinding.
const candidateClaudeCliSessionId =
params.cliSessionBinding?.sessionId?.trim() || params.cliSessionId?.trim() || undefined;
const claudeCliTranscriptMissing =
candidateClaudeCliSessionId !== undefined &&
isClaudeCliProvider(params.provider) &&
!(await prepareDeps.claudeCliSessionTranscriptHasContent({
sessionId: candidateClaudeCliSessionId,
}));
const reusableCliSession: CliReusableSession = claudeCliTranscriptMissing
? { invalidatedReason: "missing-transcript" }
: params.cliSessionBinding
? resolveCliSessionReuse({
binding: params.cliSessionBinding,
authProfileId: effectiveAuthProfileId,
authEpoch,
authEpochVersion: CLI_AUTH_EPOCH_VERSION,
extraSystemPromptHash,
mcpConfigHash: preparedBackendFinal.mcpConfigHash,
mcpResumeHash: preparedBackendFinal.mcpResumeHash,
})
: params.cliSessionId
? { sessionId: params.cliSessionId }
: {};
if (reusableCliSession.invalidatedReason) {
cliBackendLog.info(
`cli session reset: provider=${params.provider} reason=${reusableCliSession.invalidatedReason}`,

View File

@@ -78,7 +78,12 @@ export type CliPreparedBackend = {
export type CliReusableSession = {
sessionId?: string;
invalidatedReason?: "auth-profile" | "auth-epoch" | "system-prompt" | "mcp";
invalidatedReason?:
| "auth-profile"
| "auth-epoch"
| "system-prompt"
| "mcp"
| "missing-transcript";
};
export type PreparedCliRunContext = {