From 043cb32aab7dfd3747552b7bb707949cdc613de7 Mon Sep 17 00:00:00 2001 From: Chunyue Wang <80630709+openperf@users.noreply.github.com> Date: Tue, 5 May 2026 14:46:42 +0800 Subject: [PATCH] fix(session-file-repair): drop null-role message entries instead of preserving them (#77288) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit type:"message" entries with a null, missing, or blank role cannot be replayed to any provider — every router branches on message.role. The auto-repair pass was passing them through unchanged, relocating the corruption from the original file into the post-repair file (#77228 reported 935+ null-role entries surviving the pass). Add isStructurallyInvalidMessageEntry ahead of the existing rewrite predicates. Invalid message envelopes are counted as droppedLines and skipped; non-message envelope types (summary, custom, …) are unaffected. The .bak-* backup preserves the original bytes for postmortem before any entries are dropped. Tests: - pnpm test src/agents/session-file-repair.test.ts - pnpm exec oxfmt --check --threads=1 CHANGELOG.md src/agents/session-file-repair.ts src/agents/session-file-repair.test.ts - pnpm check:changed Refs #77228 --- CHANGELOG.md | 1 + src/agents/session-file-repair.test.ts | 119 +++++++++++++++++++++++++ src/agents/session-file-repair.ts | 34 +++++++ 3 files changed, 154 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60c5e1729c0..d71a0cd20db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -308,6 +308,7 @@ Docs: https://docs.openclaw.ai - Agents/bootstrap: honor `BOOTSTRAP.md` content injected by `agent:bootstrap` hooks when deciding whether bootstrap is pending, so hook-provided required setup instructions are included in the system prompt. (#77501) Thanks @ificator. - Agents/replay-history: drop trailing assistant turns whose content is empty or carries only the stream-error sentinel before sending the transcript to the provider, so prefill-strict providers (such as github-copilot/claude-opus-4.6) no longer reject the request with `400 The conversation must end with a user message` after a session whose last turn errored before producing content. Refs #77228. (#77287) Thanks @openperf. - Gateway/sessions: cache selected model override resolution while building session-list rows so `openclaw sessions` and Control UI session lists stay responsive on model-heavy stores. (#77650) Thanks @ragesaq. +- Agents/session-file-repair: drop `type: "message"` entries with a missing, `null`, or blank role during the on-disk repair pass so sessions that accumulated null-role JSONL corruption (such as the 935+ corrupt entries in #77228) get fully cleaned up rather than carried forward into the repaired file. Refs #77228. (#77288) Thanks @openperf. ## 2026.5.3-1 diff --git a/src/agents/session-file-repair.test.ts b/src/agents/session-file-repair.test.ts index 1efb6fc6c41..1f818b5180d 100644 --- a/src/agents/session-file-repair.test.ts +++ b/src/agents/session-file-repair.test.ts @@ -580,4 +580,123 @@ describe("repairSessionFileIfNeeded", () => { const after = await fs.readFile(file, "utf-8"); expect(after).toBe(original); }); + + it("drops type:message entries with null role instead of preserving them through repair (#77228)", async () => { + const { file } = await createTempSessionPath(); + const { header, message } = buildSessionHeaderAndMessage(); + + const nullRoleEntry = { + type: "message", + id: "corrupt-1", + parentId: null, + timestamp: new Date().toISOString(), + message: { role: null, content: "ignored" }, + }; + const missingRoleEntry = { + type: "message", + id: "corrupt-2", + parentId: null, + timestamp: new Date().toISOString(), + message: { content: "no role at all" }, + }; + const emptyRoleEntry = { + type: "message", + id: "corrupt-3", + parentId: null, + timestamp: new Date().toISOString(), + message: { role: " ", content: "blank role" }, + }; + + const content = [ + JSON.stringify(header), + JSON.stringify(message), + JSON.stringify(nullRoleEntry), + JSON.stringify(missingRoleEntry), + JSON.stringify(emptyRoleEntry), + ].join("\n"); + await fs.writeFile(file, `${content}\n`, "utf-8"); + + const result = await repairSessionFileIfNeeded({ sessionFile: file }); + + expect(result.repaired).toBe(true); + expect(result.droppedLines).toBe(3); + expect(result.backupPath).toBeTruthy(); + + const after = await fs.readFile(file, "utf-8"); + const lines = after.trimEnd().split("\n"); + expect(lines).toHaveLength(2); + expect(JSON.parse(lines[0])).toEqual(header); + expect(JSON.parse(lines[1])).toEqual(message); + expect(after).not.toContain('"role":null'); + }); + + it("drops a type:message entry whose message field is missing or non-object", async () => { + const { file } = await createTempSessionPath(); + const { header, message } = buildSessionHeaderAndMessage(); + + const missingMessage = { + type: "message", + id: "corrupt-4", + parentId: null, + timestamp: new Date().toISOString(), + }; + const stringMessage = { + type: "message", + id: "corrupt-5", + parentId: null, + timestamp: new Date().toISOString(), + message: "not an object", + }; + + const content = [ + JSON.stringify(header), + JSON.stringify(message), + JSON.stringify(missingMessage), + JSON.stringify(stringMessage), + ].join("\n"); + await fs.writeFile(file, `${content}\n`, "utf-8"); + + const result = await repairSessionFileIfNeeded({ sessionFile: file }); + + expect(result.repaired).toBe(true); + expect(result.droppedLines).toBe(2); + + const after = await fs.readFile(file, "utf-8"); + const lines = after.trimEnd().split("\n"); + expect(lines).toHaveLength(2); + }); + + it("preserves non-`message` envelope types (e.g. compactionSummary, custom) without role inspection", async () => { + const { file } = await createTempSessionPath(); + const { header, message } = buildSessionHeaderAndMessage(); + + const summary = { + type: "summary", + id: "summary-1", + timestamp: new Date().toISOString(), + summary: "opaque summary blob", + }; + const custom = { + type: "custom", + id: "custom-1", + customType: "model-snapshot", + timestamp: new Date().toISOString(), + data: { provider: "openai", modelApi: "openai-responses", modelId: "gpt-5" }, + }; + + const content = [ + JSON.stringify(header), + JSON.stringify(message), + JSON.stringify(summary), + JSON.stringify(custom), + ].join("\n"); + await fs.writeFile(file, `${content}\n`, "utf-8"); + + const result = await repairSessionFileIfNeeded({ sessionFile: file }); + + expect(result.repaired).toBe(false); + expect(result.droppedLines).toBe(0); + const after = await fs.readFile(file, "utf-8"); + expect(after).toBe(`${content}\n`); + }); }); diff --git a/src/agents/session-file-repair.ts b/src/agents/session-file-repair.ts index 106ab06fb64..bcb2267964d 100644 --- a/src/agents/session-file-repair.ts +++ b/src/agents/session-file-repair.ts @@ -33,6 +33,31 @@ function isSessionHeader(entry: unknown): entry is { type: string; id: string } return record.type === "session" && typeof record.id === "string" && record.id.length > 0; } +/** + * Detect a `type: "message"` entry whose `message.role` is missing, `null`, or + * not a non-empty string. Such entries surface in the wild as "null role" + * JSONL corruption (e.g. #77228 reported transcripts that contained 935+ + * entries with null roles after an earlier failure). They cannot be replayed + * to any provider — every provider router branches on `message.role` — and + * preserving them through repair just relocates the corruption from the + * original file into the post-repair file. Treat them as malformed lines: + * drop during repair so the cleaned transcript no longer carries them. + */ +function isStructurallyInvalidMessageEntry(entry: unknown): boolean { + if (!entry || typeof entry !== "object") { + return false; + } + const record = entry as { type?: unknown; message?: unknown }; + if (record.type !== "message") { + return false; + } + if (!record.message || typeof record.message !== "object") { + return true; + } + const role = (record.message as { role?: unknown }).role; + return typeof role !== "string" || role.trim().length === 0; +} + function isAssistantEntryWithEmptyContent(entry: unknown): entry is SessionMessageEntry { if (!entry || typeof entry !== "object") { return false; @@ -193,6 +218,15 @@ export async function repairSessionFileIfNeeded(params: { } try { const entry: unknown = JSON.parse(line); + if (isStructurallyInvalidMessageEntry(entry)) { + // Drop "null role" / missing-role message entries the same way we + // drop unparseable JSONL: they cannot be replayed to any provider + // and preserving them through repair just relocates the corruption + // into the post-repair file (#77228: 935+ null-role entries + // surviving the auto-repair pass). + droppedLines += 1; + continue; + } if (isAssistantEntryWithEmptyContent(entry)) { entries.push(rewriteAssistantEntryWithEmptyContent(entry)); rewrittenAssistantMessages += 1;