fix(session-file-repair): drop null-role message entries instead of preserving them (#77288)

type:"message" entries with a null, missing, or blank role cannot be
replayed to any provider — every router branches on message.role. The
auto-repair pass was passing them through unchanged, relocating the
corruption from the original file into the post-repair file (#77228
reported 935+ null-role entries surviving the pass).

Add isStructurallyInvalidMessageEntry ahead of the existing rewrite
predicates. Invalid message envelopes are counted as droppedLines and
skipped; non-message envelope types (summary, custom, …) are unaffected.
The .bak-* backup preserves the original bytes for postmortem before any
entries are dropped.

Tests:
- pnpm test src/agents/session-file-repair.test.ts
- pnpm exec oxfmt --check --threads=1 CHANGELOG.md src/agents/session-file-repair.ts src/agents/session-file-repair.test.ts
- pnpm check:changed

Refs #77228
This commit is contained in:
Chunyue Wang
2026-05-05 14:46:42 +08:00
committed by GitHub
parent ea791b3792
commit 043cb32aab
3 changed files with 154 additions and 0 deletions

View File

@@ -308,6 +308,7 @@ Docs: https://docs.openclaw.ai
- Agents/bootstrap: honor `BOOTSTRAP.md` content injected by `agent:bootstrap` hooks when deciding whether bootstrap is pending, so hook-provided required setup instructions are included in the system prompt. (#77501) Thanks @ificator.
- Agents/replay-history: drop trailing assistant turns whose content is empty or carries only the stream-error sentinel before sending the transcript to the provider, so prefill-strict providers (such as github-copilot/claude-opus-4.6) no longer reject the request with `400 The conversation must end with a user message` after a session whose last turn errored before producing content. Refs #77228. (#77287) Thanks @openperf.
- Gateway/sessions: cache selected model override resolution while building session-list rows so `openclaw sessions` and Control UI session lists stay responsive on model-heavy stores. (#77650) Thanks @ragesaq.
- Agents/session-file-repair: drop `type: "message"` entries with a missing, `null`, or blank role during the on-disk repair pass so sessions that accumulated null-role JSONL corruption (such as the 935+ corrupt entries in #77228) get fully cleaned up rather than carried forward into the repaired file. Refs #77228. (#77288) Thanks @openperf.
## 2026.5.3-1

View File

@@ -580,4 +580,123 @@ describe("repairSessionFileIfNeeded", () => {
const after = await fs.readFile(file, "utf-8");
expect(after).toBe(original);
});
it("drops type:message entries with null role instead of preserving them through repair (#77228)", async () => {
const { file } = await createTempSessionPath();
const { header, message } = buildSessionHeaderAndMessage();
const nullRoleEntry = {
type: "message",
id: "corrupt-1",
parentId: null,
timestamp: new Date().toISOString(),
message: { role: null, content: "ignored" },
};
const missingRoleEntry = {
type: "message",
id: "corrupt-2",
parentId: null,
timestamp: new Date().toISOString(),
message: { content: "no role at all" },
};
const emptyRoleEntry = {
type: "message",
id: "corrupt-3",
parentId: null,
timestamp: new Date().toISOString(),
message: { role: " ", content: "blank role" },
};
const content = [
JSON.stringify(header),
JSON.stringify(message),
JSON.stringify(nullRoleEntry),
JSON.stringify(missingRoleEntry),
JSON.stringify(emptyRoleEntry),
].join("\n");
await fs.writeFile(file, `${content}\n`, "utf-8");
const result = await repairSessionFileIfNeeded({ sessionFile: file });
expect(result.repaired).toBe(true);
expect(result.droppedLines).toBe(3);
expect(result.backupPath).toBeTruthy();
const after = await fs.readFile(file, "utf-8");
const lines = after.trimEnd().split("\n");
expect(lines).toHaveLength(2);
expect(JSON.parse(lines[0])).toEqual(header);
expect(JSON.parse(lines[1])).toEqual(message);
expect(after).not.toContain('"role":null');
});
it("drops a type:message entry whose message field is missing or non-object", async () => {
const { file } = await createTempSessionPath();
const { header, message } = buildSessionHeaderAndMessage();
const missingMessage = {
type: "message",
id: "corrupt-4",
parentId: null,
timestamp: new Date().toISOString(),
};
const stringMessage = {
type: "message",
id: "corrupt-5",
parentId: null,
timestamp: new Date().toISOString(),
message: "not an object",
};
const content = [
JSON.stringify(header),
JSON.stringify(message),
JSON.stringify(missingMessage),
JSON.stringify(stringMessage),
].join("\n");
await fs.writeFile(file, `${content}\n`, "utf-8");
const result = await repairSessionFileIfNeeded({ sessionFile: file });
expect(result.repaired).toBe(true);
expect(result.droppedLines).toBe(2);
const after = await fs.readFile(file, "utf-8");
const lines = after.trimEnd().split("\n");
expect(lines).toHaveLength(2);
});
it("preserves non-`message` envelope types (e.g. compactionSummary, custom) without role inspection", async () => {
const { file } = await createTempSessionPath();
const { header, message } = buildSessionHeaderAndMessage();
const summary = {
type: "summary",
id: "summary-1",
timestamp: new Date().toISOString(),
summary: "opaque summary blob",
};
const custom = {
type: "custom",
id: "custom-1",
customType: "model-snapshot",
timestamp: new Date().toISOString(),
data: { provider: "openai", modelApi: "openai-responses", modelId: "gpt-5" },
};
const content = [
JSON.stringify(header),
JSON.stringify(message),
JSON.stringify(summary),
JSON.stringify(custom),
].join("\n");
await fs.writeFile(file, `${content}\n`, "utf-8");
const result = await repairSessionFileIfNeeded({ sessionFile: file });
expect(result.repaired).toBe(false);
expect(result.droppedLines).toBe(0);
const after = await fs.readFile(file, "utf-8");
expect(after).toBe(`${content}\n`);
});
});

View File

@@ -33,6 +33,31 @@ function isSessionHeader(entry: unknown): entry is { type: string; id: string }
return record.type === "session" && typeof record.id === "string" && record.id.length > 0;
}
/**
* Detect a `type: "message"` entry whose `message.role` is missing, `null`, or
* not a non-empty string. Such entries surface in the wild as "null role"
* JSONL corruption (e.g. #77228 reported transcripts that contained 935+
* entries with null roles after an earlier failure). They cannot be replayed
* to any provider — every provider router branches on `message.role` — and
* preserving them through repair just relocates the corruption from the
* original file into the post-repair file. Treat them as malformed lines:
* drop during repair so the cleaned transcript no longer carries them.
*/
function isStructurallyInvalidMessageEntry(entry: unknown): boolean {
if (!entry || typeof entry !== "object") {
return false;
}
const record = entry as { type?: unknown; message?: unknown };
if (record.type !== "message") {
return false;
}
if (!record.message || typeof record.message !== "object") {
return true;
}
const role = (record.message as { role?: unknown }).role;
return typeof role !== "string" || role.trim().length === 0;
}
function isAssistantEntryWithEmptyContent(entry: unknown): entry is SessionMessageEntry {
if (!entry || typeof entry !== "object") {
return false;
@@ -193,6 +218,15 @@ export async function repairSessionFileIfNeeded(params: {
}
try {
const entry: unknown = JSON.parse(line);
if (isStructurallyInvalidMessageEntry(entry)) {
// Drop "null role" / missing-role message entries the same way we
// drop unparseable JSONL: they cannot be replayed to any provider
// and preserving them through repair just relocates the corruption
// into the post-repair file (#77228: 935+ null-role entries
// surviving the auto-repair pass).
droppedLines += 1;
continue;
}
if (isAssistantEntryWithEmptyContent(entry)) {
entries.push(rewriteAssistantEntryWithEmptyContent(entry));
rewrittenAssistantMessages += 1;