mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-15 12:20:53 +00:00
fix(backup): retry tar EOF races and skip live volatile files
Fixes #72249.\n\nSummary:\n- retry live backup tar EOF races\n- skip current live session, cron, log, and delivery-queue state files\n- preserve workspace lock/temp files and keep backup --json parseable\n\nVerification:\n- Crabbox pre-fix repro: tbx_01kr5xt9vf5pas5ee4aefrp3am\n- Crabbox post-fix proof: tbx_01kr5y3e1kbtt6chbypfdydbgs\n- pnpm check:test-types\n- pnpm lint:core\n- pnpm test src/commands/backup.test.ts src/infra/backup-volatile-filter.test.ts src/infra/backup-create.test.ts\n- CI on 37664570c7: green\n\nThanks @abnershang.
This commit is contained in:
@@ -146,6 +146,7 @@ Docs: https://docs.openclaw.ai
|
||||
### Fixes
|
||||
|
||||
- Docs/Subagents: correct the listed sub-agent bootstrap context files to include `SOUL.md`, `IDENTITY.md`, and `USER.md`. (#79470) Thanks @lastguru-net.
|
||||
- Backup: keep live backup archives from copying current agent session transcripts, cron run logs, and delivery queues while preserving workspace lock/temp files and keeping `--json` output parseable when volatile files are skipped. Fixes #72249. (#72251) Thanks @abnershang.
|
||||
- OpenAI/Codex: install the Codex runtime plugin from npm during OpenAI onboarding and load it automatically for implicit OpenAI model routes, while preserving manual PI runtime overrides. Fixes #79358.
|
||||
- OpenAI/realtime voice: defer `response.create` while a realtime response is still active, retry after `response.done`/`response.cancelled`, and align GA input transcription/noise-reduction defaults with the Codex realtime reference so Discord/Voice Call consult results can resume speaking instead of tripping the active-response race.
|
||||
- OpenAI/realtime voice: avoid duplicate barge-in cancellation requests, log realtime model interruption/cutoff events in Discord voice logs, and treat OpenAI's no-active-response cancellation reply as a completed cancel so Discord voice sessions do not wedge pending speech after fast interruptions.
|
||||
|
||||
@@ -53,6 +53,8 @@ skipped.
|
||||
|
||||
The archive payload stores file contents from those source trees, and the embedded `manifest.json` records the resolved absolute source paths plus the archive layout used for each asset.
|
||||
|
||||
During archive creation, OpenClaw skips known live-mutation files that do not have restoration value, including active agent session transcripts, cron run logs, rolling logs, delivery queues, socket/pid/temp files under the state directory, and related durable-queue temp files. The JSON result includes `skippedVolatileCount` so automation can see how many files were intentionally omitted.
|
||||
|
||||
Installed plugin source and manifest files under the state directory's
|
||||
`extensions/` tree are included, but their nested `node_modules/` dependency
|
||||
trees are skipped. Those dependencies are rebuildable install artifacts; after
|
||||
|
||||
@@ -316,6 +316,50 @@ describe("backup commands", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps volatile-skip notices out of json output", async () => {
|
||||
const stateDir = path.join(tempHome.home, ".openclaw");
|
||||
const backupDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-backups-json-"));
|
||||
try {
|
||||
const runtime = createBackupTestRuntime();
|
||||
await mockStateOnlyBackupPlan(stateDir);
|
||||
tarCreateMock.mockImplementationOnce(
|
||||
async (
|
||||
options: { file: string; filter?: (entryPath: string) => boolean },
|
||||
entryPaths: string[],
|
||||
) => {
|
||||
const manifestPath = entryPaths[0];
|
||||
const stateRoot = entryPaths[1];
|
||||
expect(manifestPath).toBeDefined();
|
||||
expect(stateRoot).toBeDefined();
|
||||
if (!manifestPath || !stateRoot) {
|
||||
throw new Error("backup test expected manifest and state entries");
|
||||
}
|
||||
expect(options.filter?.(manifestPath)).toBe(true);
|
||||
expect(
|
||||
options.filter?.(path.join(stateRoot, "agents", "main", "sessions", "s.jsonl")),
|
||||
).toBe(false);
|
||||
await fs.writeFile(options.file, "archive-bytes", "utf8");
|
||||
},
|
||||
);
|
||||
|
||||
const result = await backupCreateCommand(runtime, {
|
||||
output: backupDir,
|
||||
json: true,
|
||||
});
|
||||
|
||||
expect(result.skippedVolatileCount).toBe(1);
|
||||
expect(runtime.log).toHaveBeenCalledTimes(1);
|
||||
const payload = vi.mocked(runtime.log).mock.calls[0]?.[0];
|
||||
if (typeof payload !== "string") {
|
||||
throw new Error("backup test expected JSON string output");
|
||||
}
|
||||
expect(payload).not.toContain("Backup skipped");
|
||||
expect(JSON.parse(payload)).toMatchObject({ skippedVolatileCount: 1 });
|
||||
} finally {
|
||||
await fs.rm(backupDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("rejects output paths that would be created inside a backed-up directory", async () => {
|
||||
const stateDir = path.join(tempHome.home, ".openclaw");
|
||||
await fs.writeFile(path.join(stateDir, "openclaw.json"), JSON.stringify({}), "utf8");
|
||||
|
||||
@@ -22,7 +22,10 @@ export async function backupCreateCommand(
|
||||
runtime: RuntimeEnv,
|
||||
opts: BackupCreateOptions = {},
|
||||
): Promise<BackupCreateResult> {
|
||||
const result = await createBackupArchive(opts);
|
||||
const result = await createBackupArchive({
|
||||
...opts,
|
||||
log: opts.log ?? (opts.json ? undefined : (message: string) => runtime.log(message)),
|
||||
});
|
||||
if (opts.verify && !opts.dryRun) {
|
||||
const { backupVerifyCommand } = await loadBackupVerifyRuntime();
|
||||
await backupVerifyCommand(
|
||||
|
||||
@@ -6,6 +6,7 @@ import { backupVerifyCommand } from "../commands/backup-verify.js";
|
||||
import type { RuntimeEnv } from "../runtime.js";
|
||||
import { withOpenClawTestState } from "../test-utils/openclaw-test-state.js";
|
||||
import {
|
||||
__test as backupCreateInternals,
|
||||
buildExtensionsNodeModulesFilter,
|
||||
createBackupArchive,
|
||||
formatBackupCreateSummary,
|
||||
@@ -23,6 +24,7 @@ function makeResult(overrides: Partial<BackupCreateResult> = {}): BackupCreateRe
|
||||
verified: false,
|
||||
assets: [],
|
||||
skipped: [],
|
||||
skippedVolatileCount: 0,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
@@ -106,6 +108,159 @@ describe("formatBackupCreateSummary", () => {
|
||||
])("$name", ({ result, expected }) => {
|
||||
expect(formatBackupCreateSummary(result)).toEqual(expected);
|
||||
});
|
||||
|
||||
it("surfaces the volatile skip count in the summary", () => {
|
||||
expect(
|
||||
formatBackupCreateSummary(
|
||||
makeResult({
|
||||
assets: [
|
||||
{
|
||||
kind: "state",
|
||||
sourcePath: "/state",
|
||||
archivePath: "archive/state",
|
||||
displayPath: "~/.openclaw",
|
||||
},
|
||||
],
|
||||
skippedVolatileCount: 3,
|
||||
}),
|
||||
),
|
||||
).toEqual([
|
||||
"Backup archive: /tmp/openclaw-backup.tar.gz",
|
||||
"Included 1 path:",
|
||||
"- state: ~/.openclaw",
|
||||
"Created /tmp/openclaw-backup.tar.gz",
|
||||
"Skipped 3 volatile files (live sessions, cron logs, queues, sockets, pid/tmp).",
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isTarEofRaceError", () => {
|
||||
const { isTarEofRaceError } = backupCreateInternals;
|
||||
|
||||
it.each([
|
||||
"did not encounter expected EOF",
|
||||
"encountered unexpected EOF",
|
||||
"TAR_BAD_ARCHIVE: Unrecognized archive format",
|
||||
"Truncated input (needed 512 more bytes, only 0 available) (TAR_BAD_ARCHIVE)",
|
||||
])("matches tar-specific EOF-class error: %s", (message) => {
|
||||
expect(isTarEofRaceError(new Error(message))).toBe(true);
|
||||
});
|
||||
|
||||
it("matches errors by code even when the message is empty", () => {
|
||||
expect(isTarEofRaceError(Object.assign(new Error(""), { code: "EOF" }))).toBe(true);
|
||||
});
|
||||
|
||||
it.each([
|
||||
"EOF occurred in violation of protocol",
|
||||
"unexpected eof while reading",
|
||||
"ran out of EOF markers",
|
||||
"permission denied",
|
||||
"",
|
||||
])("does not match unrelated errors: %s", (message) => {
|
||||
expect(isTarEofRaceError(new Error(message))).toBe(false);
|
||||
});
|
||||
|
||||
it("rejects non-object inputs", () => {
|
||||
expect(isTarEofRaceError(null)).toBe(false);
|
||||
expect(isTarEofRaceError(undefined)).toBe(false);
|
||||
expect(isTarEofRaceError("did not encounter expected EOF")).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("writeTarArchiveWithRetry", () => {
|
||||
it("retries on EOF-class errors and eventually succeeds", async () => {
|
||||
const eofErr = Object.assign(new Error("did not encounter expected EOF"), {
|
||||
path: "/state/sessions/s-abc/transcript.jsonl",
|
||||
});
|
||||
const runTar = vi
|
||||
.fn<() => Promise<void>>()
|
||||
.mockRejectedValueOnce(eofErr)
|
||||
.mockRejectedValueOnce(eofErr)
|
||||
.mockResolvedValueOnce(undefined);
|
||||
const log = vi.fn();
|
||||
const sleep = vi.fn<(ms: number) => Promise<void>>().mockResolvedValue(undefined);
|
||||
|
||||
await backupCreateInternals.writeTarArchiveWithRetry({
|
||||
tempArchivePath: "/tmp/backup.tar.gz.tmp",
|
||||
runTar,
|
||||
log,
|
||||
sleepMs: sleep,
|
||||
});
|
||||
|
||||
expect(runTar).toHaveBeenCalledTimes(3);
|
||||
expect(sleep).toHaveBeenNthCalledWith(1, 10_000);
|
||||
expect(sleep).toHaveBeenNthCalledWith(2, 20_000);
|
||||
expect(log).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("surfaces the offending path and attempt count after exhausting retries", async () => {
|
||||
const eofErr = Object.assign(new Error("did not encounter expected EOF"), {
|
||||
path: "/state/logs/gateway.jsonl",
|
||||
});
|
||||
const runTar = vi.fn<() => Promise<void>>().mockRejectedValue(eofErr);
|
||||
const sleep = vi.fn<(ms: number) => Promise<void>>().mockResolvedValue(undefined);
|
||||
|
||||
await expect(
|
||||
backupCreateInternals.writeTarArchiveWithRetry({
|
||||
tempArchivePath: "/tmp/backup.tar.gz.tmp",
|
||||
runTar,
|
||||
sleepMs: sleep,
|
||||
}),
|
||||
).rejects.toThrow(/last offending path: \/state\/logs\/gateway\.jsonl, after 3 attempts/);
|
||||
expect(runTar).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it("lets callers reset per-attempt counters so retries report the final attempt's count, not a running sum", async () => {
|
||||
// Simulate the caller's pattern: a closure counter populated by a filter
|
||||
// that tar.c invokes while walking the tree. Each attempt re-walks the
|
||||
// same tree, so the runTar closure must reset the counter before calling
|
||||
// tar.c -- otherwise the reported count accumulates across attempts.
|
||||
let skippedVolatileCount = 0;
|
||||
const volatileFilesSeenPerAttempt = 5;
|
||||
let attempt = 0;
|
||||
|
||||
const eofErr = Object.assign(new Error("did not encounter expected EOF"), {
|
||||
path: "/state/sessions/s-abc/transcript.jsonl",
|
||||
});
|
||||
|
||||
const runTar = vi.fn<() => Promise<void>>().mockImplementation(async () => {
|
||||
attempt += 1;
|
||||
skippedVolatileCount = 0;
|
||||
for (let i = 0; i < volatileFilesSeenPerAttempt; i += 1) {
|
||||
skippedVolatileCount += 1;
|
||||
}
|
||||
if (attempt < 3) {
|
||||
throw eofErr;
|
||||
}
|
||||
});
|
||||
const sleep = vi.fn<(ms: number) => Promise<void>>().mockResolvedValue(undefined);
|
||||
|
||||
await backupCreateInternals.writeTarArchiveWithRetry({
|
||||
tempArchivePath: "/tmp/backup.tar.gz.tmp",
|
||||
runTar,
|
||||
sleepMs: sleep,
|
||||
});
|
||||
|
||||
expect(runTar).toHaveBeenCalledTimes(3);
|
||||
// Without the reset, this would be 15 (5 * 3 attempts). With the reset,
|
||||
// it equals the count from the final (successful) attempt.
|
||||
expect(skippedVolatileCount).toBe(volatileFilesSeenPerAttempt);
|
||||
});
|
||||
|
||||
it("does not retry on non-EOF errors", async () => {
|
||||
const runTar = vi.fn<() => Promise<void>>().mockRejectedValue(new Error("permission denied"));
|
||||
const sleep = vi.fn<(ms: number) => Promise<void>>().mockResolvedValue(undefined);
|
||||
|
||||
await expect(
|
||||
backupCreateInternals.writeTarArchiveWithRetry({
|
||||
tempArchivePath: "/tmp/backup.tar.gz.tmp",
|
||||
runTar,
|
||||
sleepMs: sleep,
|
||||
}),
|
||||
).rejects.toThrow(/permission denied/);
|
||||
expect(runTar).toHaveBeenCalledTimes(1);
|
||||
expect(sleep).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildExtensionsNodeModulesFilter", () => {
|
||||
@@ -131,6 +286,65 @@ describe("buildExtensionsNodeModulesFilter", () => {
|
||||
});
|
||||
|
||||
describe("createBackupArchive", () => {
|
||||
it("skips current live volatile state files while preserving workspace locks", async () => {
|
||||
await withOpenClawTestState(
|
||||
{
|
||||
layout: "split",
|
||||
prefix: "openclaw-backup-volatile-",
|
||||
scenario: "minimal",
|
||||
},
|
||||
async (state) => {
|
||||
const outputDir = state.path("backups");
|
||||
await state.writeConfig({
|
||||
agents: {
|
||||
list: [{ id: "main", default: true, workspace: state.workspaceDir }],
|
||||
},
|
||||
});
|
||||
await fs.mkdir(outputDir, { recursive: true });
|
||||
await fs.writeFile(path.join(state.workspaceDir, "Cargo.lock"), "workspace lock\n", "utf8");
|
||||
await fs.writeFile(
|
||||
path.join(state.workspaceDir, "pending.tmp"),
|
||||
"workspace temp fixture\n",
|
||||
"utf8",
|
||||
);
|
||||
await state.writeText("agents/main/sessions/live-session.jsonl", "session\n");
|
||||
await state.writeText("sessions/legacy-session.jsonl", "legacy session\n");
|
||||
await state.writeText("cron/runs/nightly.jsonl", "cron\n");
|
||||
await state.writeText("logs/gateway.log", "log\n");
|
||||
await state.writeJson("delivery-queue/message.json", { id: "delivery" });
|
||||
await state.writeJson("session-delivery-queue/message.json", { id: "session-delivery" });
|
||||
await state.writeText("tmp/staged.tmp", "tmp\n");
|
||||
await state.writeText("gateway.pid", "123\n");
|
||||
|
||||
const result = await createBackupArchive({
|
||||
output: outputDir,
|
||||
includeWorkspace: true,
|
||||
nowMs: Date.UTC(2026, 4, 9, 8, 0, 0),
|
||||
});
|
||||
const entries = await listArchiveEntries(result.archivePath);
|
||||
|
||||
expect(entries.some((entry) => entry.endsWith("/workspace/Cargo.lock"))).toBe(true);
|
||||
expect(entries.some((entry) => entry.endsWith("/workspace/pending.tmp"))).toBe(true);
|
||||
for (const suffix of [
|
||||
"/state/agents/main/sessions/live-session.jsonl",
|
||||
"/state/sessions/legacy-session.jsonl",
|
||||
"/state/cron/runs/nightly.jsonl",
|
||||
"/state/logs/gateway.log",
|
||||
"/state/delivery-queue/message.json",
|
||||
"/state/session-delivery-queue/message.json",
|
||||
"/state/tmp/staged.tmp",
|
||||
"/state/gateway.pid",
|
||||
]) {
|
||||
expect(
|
||||
entries.some((entry) => entry.endsWith(suffix)),
|
||||
suffix,
|
||||
).toBe(false);
|
||||
}
|
||||
expect(result.skippedVolatileCount).toBe(8);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("omits installed plugin node_modules from the real archive while keeping plugin files", async () => {
|
||||
await withOpenClawTestState(
|
||||
{
|
||||
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
import { isPathWithin } from "../commands/cleanup-utils.js";
|
||||
import { resolveHomeDir, resolveUserPath } from "../utils.js";
|
||||
import { resolveRuntimeServiceVersion } from "../version.js";
|
||||
import { isVolatileBackupPath } from "./backup-volatile-filter.js";
|
||||
import { writeJson } from "./json-files.js";
|
||||
|
||||
type TarRuntime = typeof import("tar");
|
||||
@@ -32,6 +33,12 @@ export type BackupCreateOptions = {
|
||||
verify?: boolean;
|
||||
json?: boolean;
|
||||
nowMs?: number;
|
||||
/**
|
||||
* Optional info logger invoked for non-fatal backup events such as tar
|
||||
* retry notices or volatile-file skip counts. When omitted, events are
|
||||
* silent aside from the final result.
|
||||
*/
|
||||
log?: (message: string) => void;
|
||||
};
|
||||
|
||||
type BackupManifestAsset = {
|
||||
@@ -82,8 +89,91 @@ export type BackupCreateResult = {
|
||||
reason: string;
|
||||
coveredBy?: string;
|
||||
}>;
|
||||
/**
|
||||
* Count of files the archiver actively skipped because they matched the
|
||||
* known-volatile filter (live sessions, cron logs, queues, sockets, pid/tmp).
|
||||
* Populated on real writes only; dry runs report 0.
|
||||
*/
|
||||
skippedVolatileCount: number;
|
||||
};
|
||||
|
||||
const BACKUP_TAR_MAX_ATTEMPTS = 3;
|
||||
// Backoff between attempts: wait 10s before attempt 2, 20s before attempt 3.
|
||||
const BACKUP_TAR_BACKOFF_MS = [10_000, 20_000];
|
||||
|
||||
function isTarEofRaceError(err: unknown): boolean {
|
||||
if (!err || typeof err !== "object") {
|
||||
return false;
|
||||
}
|
||||
const code = (err as NodeJS.ErrnoException).code;
|
||||
if (code === "EOF") {
|
||||
return true;
|
||||
}
|
||||
// Keep this regex narrow: match only the two tar-specific EOF-class error
|
||||
// strings thrown by node-tar's WriteEntry#onread (grow and shrink races,
|
||||
// see node_modules/tar/dist/commonjs/write-entry.js around the
|
||||
// "did not encounter expected EOF" and "encountered unexpected EOF"
|
||||
// Object.assign sites), plus the TAR_BAD_ARCHIVE code surfaced by the
|
||||
// parser on truncated input. A bare /EOF/i alternative also matched
|
||||
// unrelated SSL/OpenSSL strings like "EOF occurred in violation of
|
||||
// protocol" and "unexpected eof while reading", causing pointless retries.
|
||||
const message = (err as Error).message ?? "";
|
||||
return /(did not encounter expected|encountered unexpected) EOF|TAR_BAD_ARCHIVE/i.test(message);
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
export type BackupTarRetryLogger = (message: string) => void;
|
||||
|
||||
async function writeTarArchiveWithRetry(params: {
|
||||
tempArchivePath: string;
|
||||
runTar: () => Promise<void>;
|
||||
log?: BackupTarRetryLogger;
|
||||
sleepMs?: (ms: number) => Promise<void>;
|
||||
}): Promise<void> {
|
||||
const sleepFn = params.sleepMs ?? sleep;
|
||||
let lastErr: unknown;
|
||||
for (let attempt = 1; attempt <= BACKUP_TAR_MAX_ATTEMPTS; attempt += 1) {
|
||||
try {
|
||||
await params.runTar();
|
||||
return;
|
||||
} catch (err) {
|
||||
lastErr = err;
|
||||
if (!isTarEofRaceError(err) || attempt === BACKUP_TAR_MAX_ATTEMPTS) {
|
||||
break;
|
||||
}
|
||||
try {
|
||||
await fs.rm(params.tempArchivePath, { force: true });
|
||||
} catch (cleanupErr) {
|
||||
const code = (cleanupErr as NodeJS.ErrnoException).code;
|
||||
if (code && code !== "ENOENT") {
|
||||
params.log?.(
|
||||
`Backup archiver could not remove temp archive ${params.tempArchivePath} between retries: ${code}. Continuing.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
const backoff = BACKUP_TAR_BACKOFF_MS[attempt - 1] ?? 0;
|
||||
const offendingPath = (err as NodeJS.ErrnoException).path;
|
||||
params.log?.(
|
||||
`Backup archiver hit a live-write race${
|
||||
offendingPath ? ` on ${offendingPath}` : ""
|
||||
} (attempt ${attempt}/${BACKUP_TAR_MAX_ATTEMPTS}); retrying in ${Math.round(backoff / 1000)}s.`,
|
||||
);
|
||||
await sleepFn(backoff);
|
||||
}
|
||||
}
|
||||
const final = lastErr instanceof Error ? lastErr : new Error(String(lastErr));
|
||||
const offendingPath = (lastErr as NodeJS.ErrnoException | undefined)?.path;
|
||||
const suffix = offendingPath
|
||||
? ` (last offending path: ${offendingPath}, after ${BACKUP_TAR_MAX_ATTEMPTS} attempts)`
|
||||
: ` (after ${BACKUP_TAR_MAX_ATTEMPTS} attempts)`;
|
||||
throw new Error(`Backup archive write failed: ${final.message}${suffix}`, { cause: final });
|
||||
}
|
||||
|
||||
export const __test = { writeTarArchiveWithRetry, isTarEofRaceError };
|
||||
|
||||
async function resolveOutputPath(params: {
|
||||
output?: string;
|
||||
nowMs: number;
|
||||
@@ -259,6 +349,13 @@ export function formatBackupCreateSummary(result: BackupCreateResult): string[]
|
||||
lines.push("Dry run only; archive was not written.");
|
||||
} else {
|
||||
lines.push(`Created ${result.archivePath}`);
|
||||
if (result.skippedVolatileCount > 0) {
|
||||
lines.push(
|
||||
`Skipped ${result.skippedVolatileCount} volatile file${
|
||||
result.skippedVolatileCount === 1 ? "" : "s"
|
||||
} (live sessions, cron logs, queues, sockets, pid/tmp).`,
|
||||
);
|
||||
}
|
||||
if (result.verified) {
|
||||
lines.push("Archive verification: passed");
|
||||
}
|
||||
@@ -344,6 +441,7 @@ export async function createBackupArchive(
|
||||
verified: false,
|
||||
assets: plan.included,
|
||||
skipped: plan.skipped,
|
||||
skippedVolatileCount: 0,
|
||||
};
|
||||
|
||||
if (opts.dryRun) {
|
||||
@@ -371,24 +469,61 @@ export async function createBackupArchive(
|
||||
|
||||
const tar = await loadTarRuntime();
|
||||
const stateAsset = result.assets.find((asset) => asset.kind === "state");
|
||||
const filter = stateAsset ? buildExtensionsNodeModulesFilter(stateAsset.sourcePath) : undefined;
|
||||
await tar.c(
|
||||
{
|
||||
file: tempArchivePath,
|
||||
...(filter ? { filter } : {}),
|
||||
gzip: true,
|
||||
portable: true,
|
||||
preservePaths: true,
|
||||
onWriteEntry: (entry) => {
|
||||
entry.path = remapArchiveEntryPath({
|
||||
entryPath: entry.path,
|
||||
manifestPath,
|
||||
archiveRoot,
|
||||
});
|
||||
},
|
||||
const extensionsFilter = stateAsset
|
||||
? buildExtensionsNodeModulesFilter(stateAsset.sourcePath)
|
||||
: undefined;
|
||||
const volatilePlan = { stateDirs: [stateAsset?.sourcePath ?? plan.stateDir] };
|
||||
let skippedVolatileCount = 0;
|
||||
const tarFilter = (entryPath: string): boolean => {
|
||||
// The manifest is staged in a tmp dir outside any state directory and
|
||||
// is always safe to include.
|
||||
if (path.resolve(entryPath) === manifestPath) {
|
||||
return true;
|
||||
}
|
||||
if (extensionsFilter && !extensionsFilter(entryPath)) {
|
||||
return false;
|
||||
}
|
||||
if (isVolatileBackupPath(entryPath, volatilePlan)) {
|
||||
skippedVolatileCount += 1;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
await writeTarArchiveWithRetry({
|
||||
tempArchivePath,
|
||||
log: opts.log,
|
||||
runTar: () => {
|
||||
// tar.c re-walks the tree (and thus re-invokes tarFilter) on every
|
||||
// attempt, so reset the closure counter here or retries would report
|
||||
// cumulative skip counts across attempts instead of the final one.
|
||||
skippedVolatileCount = 0;
|
||||
return tar.c(
|
||||
{
|
||||
file: tempArchivePath,
|
||||
gzip: true,
|
||||
portable: true,
|
||||
preservePaths: true,
|
||||
filter: tarFilter,
|
||||
onWriteEntry: (entry) => {
|
||||
entry.path = remapArchiveEntryPath({
|
||||
entryPath: entry.path,
|
||||
manifestPath,
|
||||
archiveRoot,
|
||||
});
|
||||
},
|
||||
},
|
||||
[manifestPath, ...result.assets.map((asset) => asset.sourcePath)],
|
||||
);
|
||||
},
|
||||
[manifestPath, ...result.assets.map((asset) => asset.sourcePath)],
|
||||
);
|
||||
});
|
||||
result.skippedVolatileCount = skippedVolatileCount;
|
||||
if (skippedVolatileCount > 0) {
|
||||
opts.log?.(
|
||||
`Backup skipped ${skippedVolatileCount} volatile file${
|
||||
skippedVolatileCount === 1 ? "" : "s"
|
||||
} (live sessions, cron logs, queues, sockets, pid/tmp).`,
|
||||
);
|
||||
}
|
||||
await publishTempArchive({ tempArchivePath, outputPath });
|
||||
} finally {
|
||||
await fs.rm(tempArchivePath, { force: true }).catch(() => undefined);
|
||||
|
||||
120
src/infra/backup-volatile-filter.test.ts
Normal file
120
src/infra/backup-volatile-filter.test.ts
Normal file
@@ -0,0 +1,120 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { isVolatileBackupPath } from "./backup-volatile-filter.js";
|
||||
|
||||
const stateDir = "/opt/openclaw/state";
|
||||
const plan = { stateDirs: [stateDir] };
|
||||
|
||||
describe("isVolatileBackupPath", () => {
|
||||
it.each([
|
||||
// volatile: session transcripts
|
||||
[`${stateDir}/sessions/s-abc/transcript.jsonl`, true],
|
||||
[`${stateDir}/sessions/s-abc/run.log`, true],
|
||||
[`${stateDir}/agents/main/sessions/transcript.jsonl`, true],
|
||||
[`${stateDir}/agents/ops/sessions/run.log`, true],
|
||||
// volatile: cron run logs
|
||||
[`${stateDir}/cron/runs/2026-01-01/job.log`, true],
|
||||
[`${stateDir}/cron/runs/nightly.jsonl`, true],
|
||||
// volatile: generic state logs
|
||||
[`${stateDir}/logs/gateway.jsonl`, true],
|
||||
[`${stateDir}/logs/nested/gateway.log`, true],
|
||||
// volatile: sockets/pids/tmp under state
|
||||
[`${stateDir}/ipc/gateway.sock`, true],
|
||||
[`${stateDir}/gateway.pid`, true],
|
||||
[`${stateDir}/tmp/pending.tmp`, true],
|
||||
[`${stateDir}/delivery-queue/pending.tmp`, true],
|
||||
[`${stateDir}/session-delivery-queue/pending.tmp`, true],
|
||||
|
||||
// non-volatile: session config, not jsonl/log
|
||||
[`${stateDir}/sessions/s-abc/meta.json`, false],
|
||||
[`${stateDir}/agents/main/sessions/sessions.json`, false],
|
||||
// non-volatile: cron definitions
|
||||
[`${stateDir}/cron/jobs.json`, false],
|
||||
// non-volatile: cron runs but wrong extension
|
||||
[`${stateDir}/cron/runs/2026-01-01/job.json`, false],
|
||||
// non-volatile: plain config
|
||||
[`${stateDir}/config.json`, false],
|
||||
// non-volatile: workspace files outside state
|
||||
["/home/user/project/README.md", false],
|
||||
["/home/user/project/Cargo.lock", false],
|
||||
["/home/user/project/pending.tmp", false],
|
||||
// non-volatile: log-like name outside scope
|
||||
["/home/user/notes/daily.log", false],
|
||||
])("classifies %s as volatile=%s", (p, expected) => {
|
||||
expect(isVolatileBackupPath(p, plan)).toBe(expected);
|
||||
});
|
||||
|
||||
it("returns false when no state dirs are provided", () => {
|
||||
expect(
|
||||
isVolatileBackupPath(`${stateDir}/sessions/s-abc/transcript.jsonl`, { stateDirs: [] }),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("does not skip transient extensions without a state anchor", () => {
|
||||
expect(isVolatileBackupPath("/any/path/daemon.sock", { stateDirs: [] })).toBe(false);
|
||||
expect(isVolatileBackupPath("/any/path/daemon.pid", { stateDirs: [] })).toBe(false);
|
||||
expect(isVolatileBackupPath("/any/path/Cargo.lock", { stateDirs: [] })).toBe(false);
|
||||
});
|
||||
|
||||
it("does not match paths that escape the anchor via `..`", () => {
|
||||
// `/opt/openclaw/state/sessions/../config.jsonl` resolves to
|
||||
// `/opt/openclaw/state/config.jsonl`, which is NOT inside sessions/.
|
||||
expect(isVolatileBackupPath(`${stateDir}/sessions/../config.jsonl`, plan)).toBe(false);
|
||||
expect(isVolatileBackupPath(`${stateDir}/cron/runs/../jobs.log`, plan)).toBe(false);
|
||||
expect(isVolatileBackupPath(`${stateDir}/logs/../notes.jsonl`, plan)).toBe(false);
|
||||
});
|
||||
|
||||
it("treats delivery-queue json files under stateDir as volatile", () => {
|
||||
expect(
|
||||
isVolatileBackupPath(
|
||||
`${stateDir}/delivery-queue/3fac5e46-42dc-4230-a725-51c203830b4f.json`,
|
||||
plan,
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("treats nested delivery-queue json files under stateDir as volatile", () => {
|
||||
expect(
|
||||
isVolatileBackupPath(
|
||||
`${stateDir}/delivery-queue/subdir/3fac5e46-42dc-4230-a725-51c203830b4f.json`,
|
||||
plan,
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("does not treat non-json delivery-queue files as volatile", () => {
|
||||
expect(isVolatileBackupPath(`${stateDir}/delivery-queue/README.md`, plan)).toBe(false);
|
||||
});
|
||||
|
||||
it("does not treat delivery-queue json outside stateDir as volatile", () => {
|
||||
expect(isVolatileBackupPath(`/tmp/delivery-queue/file.json`, plan)).toBe(false);
|
||||
});
|
||||
|
||||
it("normalizes Windows-style separators before anchor checks", () => {
|
||||
const winStateDir = "C:\\openclaw\\state";
|
||||
const winPlan = { stateDirs: [winStateDir] };
|
||||
expect(isVolatileBackupPath(`${winStateDir}\\sessions\\s-abc\\transcript.jsonl`, winPlan)).toBe(
|
||||
true,
|
||||
);
|
||||
expect(isVolatileBackupPath(`${winStateDir}\\agents\\main\\sessions\\s.jsonl`, winPlan)).toBe(
|
||||
true,
|
||||
);
|
||||
expect(isVolatileBackupPath(`${winStateDir}\\cron\\runs\\2026\\job.jsonl`, winPlan)).toBe(true);
|
||||
// `..` escape via backslashes must also be rejected.
|
||||
expect(isVolatileBackupPath(`${winStateDir}\\sessions\\..\\config.jsonl`, winPlan)).toBe(false);
|
||||
});
|
||||
|
||||
it("matches tar filter paths when node-tar omits the leading slash", () => {
|
||||
expect(
|
||||
isVolatileBackupPath("opt/openclaw/state/agents/main/sessions/transcript.jsonl", plan),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("treats session-delivery-queue json files under stateDir as volatile", () => {
|
||||
expect(
|
||||
isVolatileBackupPath(
|
||||
`${stateDir}/session-delivery-queue/3fac5e46-42dc-4230-a725-51c203830b4f.json`,
|
||||
plan,
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
131
src/infra/backup-volatile-filter.ts
Normal file
131
src/infra/backup-volatile-filter.ts
Normal file
@@ -0,0 +1,131 @@
|
||||
import path from "node:path";
|
||||
|
||||
/**
|
||||
* Paths that are known to change during a live backup and commonly trigger
|
||||
* tar EOF errors. These files are actively appended to (logs, sockets, pid
|
||||
* markers) while `tar.c()` is reading them, which races with the size recorded
|
||||
* at `lstat()` time.
|
||||
*
|
||||
* Skipping them is safe: they are either recreated on startup, are transient
|
||||
* by nature, or have durable equivalents elsewhere in state. Snapshotting a
|
||||
* partial tail of a live log has no restoration value.
|
||||
*/
|
||||
|
||||
const STATE_TRANSIENT_EXTENSIONS = new Set([".sock", ".pid", ".tmp"]);
|
||||
|
||||
function normalizePosix(input: string): string {
|
||||
if (!input) {
|
||||
return input;
|
||||
}
|
||||
// Swap Windows-style separators, then collapse `.`/`..` segments so ancestry
|
||||
// checks cannot be bypassed by a path that traverses out of the anchor.
|
||||
return path.posix.normalize(input.replaceAll("\\", "/"));
|
||||
}
|
||||
|
||||
function isUnder(childPosix: string, parentPosix: string): boolean {
|
||||
if (!parentPosix) {
|
||||
return false;
|
||||
}
|
||||
const p = parentPosix.endsWith("/") ? parentPosix : `${parentPosix}/`;
|
||||
return childPosix === parentPosix || childPosix.startsWith(p);
|
||||
}
|
||||
|
||||
function hasExtension(filePosix: string, extensions: readonly string[]): boolean {
|
||||
const ext = path.posix.extname(filePosix).toLowerCase();
|
||||
return extensions.includes(ext);
|
||||
}
|
||||
|
||||
function hasExtensionInSet(filePosix: string, extensions: ReadonlySet<string>): boolean {
|
||||
return extensions.has(path.posix.extname(filePosix).toLowerCase());
|
||||
}
|
||||
|
||||
function isAgentSessionTranscriptPath(filePosix: string, stateDirPosix: string): boolean {
|
||||
const agentsRoot = path.posix.join(stateDirPosix, "agents");
|
||||
if (!isUnder(filePosix, agentsRoot)) {
|
||||
return false;
|
||||
}
|
||||
const relative = path.posix.relative(agentsRoot, filePosix);
|
||||
const parts = relative.split("/").filter(Boolean);
|
||||
return parts.length >= 3 && parts[1] === "sessions";
|
||||
}
|
||||
|
||||
function filePathCandidates(input: string): string[] {
|
||||
const normalized = normalizePosix(input);
|
||||
if (normalized.startsWith("/") || /^[A-Za-z]:\//u.test(normalized)) {
|
||||
return [normalized];
|
||||
}
|
||||
// node-tar may pass absolute input paths to filters without the leading
|
||||
// slash, even when the source list used absolute paths.
|
||||
return [normalized, normalizePosix(`/${normalized}`)];
|
||||
}
|
||||
|
||||
export type VolatileFilterPlan = {
|
||||
/** Canonical state directories the filter should treat as volatile anchors. */
|
||||
stateDirs: string[];
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns true if the given absolute path should be skipped during backup
|
||||
* because it is a live-mutation target.
|
||||
*
|
||||
* Rules:
|
||||
* - `{stateDir}/sessions/**`/`*.{jsonl,log}` (legacy)
|
||||
* - `{stateDir}/agents/<agentId>/sessions/**`/`*.{jsonl,log}`
|
||||
* - `{stateDir}/cron/runs/**`/`*.{jsonl,log}`
|
||||
* - `{stateDir}/logs/**`/`*.{jsonl,log}`
|
||||
* - `{stateDir}/{delivery-queue,session-delivery-queue}/**`/`*.{json,tmp}`
|
||||
* - `{stateDir}/**`/`*.{sock,pid,tmp}`
|
||||
*/
|
||||
export function isVolatileBackupPath(absolutePath: string, plan: VolatileFilterPlan): boolean {
|
||||
if (!absolutePath) {
|
||||
return false;
|
||||
}
|
||||
const candidates = filePathCandidates(absolutePath);
|
||||
|
||||
for (const stateDir of plan.stateDirs) {
|
||||
if (!stateDir) {
|
||||
continue;
|
||||
}
|
||||
const stateDirPosix = normalizePosix(stateDir);
|
||||
|
||||
for (const filePosix of candidates) {
|
||||
const sessionsRoot = path.posix.join(stateDirPosix, "sessions");
|
||||
if (isUnder(filePosix, sessionsRoot) && hasExtension(filePosix, [".jsonl", ".log"])) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
isAgentSessionTranscriptPath(filePosix, stateDirPosix) &&
|
||||
hasExtension(filePosix, [".jsonl", ".log"])
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const cronRunsRoot = path.posix.join(stateDirPosix, "cron", "runs");
|
||||
if (isUnder(filePosix, cronRunsRoot) && hasExtension(filePosix, [".jsonl", ".log"])) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const logsRoot = path.posix.join(stateDirPosix, "logs");
|
||||
if (isUnder(filePosix, logsRoot) && hasExtension(filePosix, [".jsonl", ".log"])) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (const queueDir of ["delivery-queue", "session-delivery-queue"]) {
|
||||
const queueRoot = path.posix.join(stateDirPosix, queueDir);
|
||||
if (isUnder(filePosix, queueRoot) && hasExtension(filePosix, [".json", ".tmp"])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
isUnder(filePosix, stateDirPosix) &&
|
||||
hasExtensionInSet(filePosix, STATE_TRANSIENT_EXTENSIONS)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
Reference in New Issue
Block a user