fix: canonicalize malformed assistant replay content

This commit is contained in:
Peter Steinberger
2026-03-23 17:36:45 -07:00
parent a5c35050f3
commit ce75f60ae9
3 changed files with 91 additions and 1 deletions

View File

@@ -308,6 +308,34 @@ describe("sanitizeSessionHistory", () => {
).toBe(false);
});
it("canonicalizes malformed assistant history content before replay sanitization", async () => {
setNonGoogleModelApi();
const messages = castAgentMessages([
{ role: "user", content: "Question" },
{ role: "assistant", content: "legacy-content" },
{ role: "assistant", content: { unexpected: true } },
]);
const result = await sanitizeSessionHistory({
messages,
modelApi: "openai-responses",
provider: "openai",
sessionManager: mockSessionManager,
sessionId: TEST_SESSION_ID,
});
expect(result[0]).toEqual(messages[0]);
expect(result[1]).toMatchObject({
role: "assistant",
content: [{ type: "text", text: "legacy-content" }],
});
expect(result[2]).toMatchObject({
role: "assistant",
content: [{ type: "text", text: "" }],
});
});
it("annotates inter-session user messages before context sanitization", async () => {
setNonGoogleModelApi();

View File

@@ -60,6 +60,8 @@ const GOOGLE_SCHEMA_UNSUPPORTED_KEYWORDS = new Set([
]);
const INTER_SESSION_PREFIX_BASE = "[Inter-session message]";
type AssistantHistoryMessage = Extract<AgentMessage, { role: "assistant" }>;
type RawAssistantHistoryMessage = Omit<AssistantHistoryMessage, "content"> & { content?: unknown };
function buildInterSessionPrefix(message: AgentMessage): string {
const provenance = normalizeInputProvenance((message as { provenance?: unknown }).provenance);
@@ -140,6 +142,61 @@ function annotateInterSessionUserMessages(messages: AgentMessage[]): AgentMessag
return touched ? out : messages;
}
function describeAssistantContentKind(content: unknown): string {
if (Array.isArray(content)) {
return "array";
}
if (content === null) {
return "null";
}
return typeof content;
}
function canonicalizeAssistantHistoryMessages(params: {
messages: AgentMessage[];
sessionId: string;
}): AgentMessage[] {
let touched = false;
let repairedCount = 0;
const repairedKinds = new Set<string>();
const out: AgentMessage[] = [];
for (const msg of params.messages) {
if (!msg || typeof msg !== "object" || msg.role !== "assistant") {
out.push(msg);
continue;
}
const assistant = msg as RawAssistantHistoryMessage;
if (Array.isArray(assistant.content)) {
out.push(msg);
continue;
}
// Session transcripts and custom stream boundaries have historically leaked
// malformed assistant payloads. Repair them here so Pi replay only sees the
// canonical array-based assistant content contract.
const repairedText = typeof assistant.content === "string" ? assistant.content : "";
out.push({
...(assistant as unknown as Record<string, unknown>),
content: [{ type: "text", text: repairedText }],
} as AgentMessage);
touched = true;
repairedCount += 1;
repairedKinds.add(describeAssistantContentKind(assistant.content));
}
if (!touched) {
return params.messages;
}
log.warn(
`sanitizeSessionHistory: canonicalized ${repairedCount} malformed assistant message(s) before replay ` +
`session=${params.sessionId} contentKinds=${Array.from(repairedKinds).join(",")}`,
);
return out;
}
function parseMessageTimestamp(value: unknown): number | null {
if (typeof value === "number" && Number.isFinite(value)) {
return value;
@@ -537,8 +594,12 @@ export async function sanitizeSessionHistory(params: {
modelId: params.modelId,
});
const withInterSessionMarkers = annotateInterSessionUserMessages(params.messages);
const canonicalizedAssistantHistory = canonicalizeAssistantHistoryMessages({
messages: withInterSessionMarkers,
sessionId: params.sessionId,
});
const sanitizedImages = await sanitizeSessionMessagesImages(
withInterSessionMarkers,
canonicalizedAssistantHistory,
"session:history",
{
sanitizeMode: policy.sanitizeMode,