diff --git a/packages/memory-host-sdk/src/host/session-files.test.ts b/packages/memory-host-sdk/src/host/session-files.test.ts index 476aa35644b..7ad7ee8c32a 100644 --- a/packages/memory-host-sdk/src/host/session-files.test.ts +++ b/packages/memory-host-sdk/src/host/session-files.test.ts @@ -120,4 +120,34 @@ describe("buildSessionEntry", () => { expect(entry).not.toBeNull(); expect(entry!.lineMap).toEqual([3, 5]); }); + + it("strips inbound metadata when a user envelope is split across text blocks", async () => { + const jsonlLines = [ + JSON.stringify({ + type: "message", + message: { + role: "user", + content: [ + { type: "text", text: "Conversation info (untrusted metadata):" }, + { type: "text", text: "```json" }, + { type: "text", text: '{"message_id":"msg-100","chat_id":"-100123"}' }, + { type: "text", text: "```" }, + { type: "text", text: "" }, + { type: "text", text: "Sender (untrusted metadata):" }, + { type: "text", text: "```json" }, + { type: "text", text: '{"label":"Chris","id":"42"}' }, + { type: "text", text: "```" }, + { type: "text", text: "" }, + { type: "text", text: "Actual user text" }, + ], + }, + }), + ]; + const filePath = path.join(tmpDir, "enveloped-session-array.jsonl"); + await fs.writeFile(filePath, jsonlLines.join("\n")); + + const entry = await buildSessionEntry(filePath); + expect(entry).not.toBeNull(); + expect(entry!.content).toBe("User: Actual user text"); + }); }); diff --git a/packages/memory-host-sdk/src/host/session-files.ts b/packages/memory-host-sdk/src/host/session-files.ts index 8d19279180f..d2a1116189e 100644 --- a/packages/memory-host-sdk/src/host/session-files.ts +++ b/packages/memory-host-sdk/src/host/session-files.ts @@ -69,6 +69,26 @@ function normalizeSessionText(value: string): string { .trim(); } +function collectRawSessionText(content: unknown): string | null { + if (typeof content === "string") { + return content; + } + if (!Array.isArray(content)) { + return null; + } + const parts: string[] = []; + for (const block of content) { + if (!block || typeof block !== "object") { + continue; + } + const record = block as { type?: unknown; text?: unknown }; + if (record.type === "text" && typeof record.text === "string") { + parts.push(record.text); + } + } + return parts.length > 0 ? parts.join("\n") : null; +} + /** * Strip OpenClaw-injected inbound metadata envelopes from a raw text block * on user-role messages before normalization. See the authoritative @@ -86,33 +106,13 @@ export function extractSessionText( content: unknown, role: "user" | "assistant" = "assistant", ): string | null { - if (typeof content === "string") { - const stripped = stripInboundMetadataForUserRole(content, role); - const normalized = normalizeSessionText(stripped); - return normalized ? normalized : null; - } - if (!Array.isArray(content)) { + const rawText = collectRawSessionText(content); + if (rawText === null) { return null; } - const parts: string[] = []; - for (const block of content) { - if (!block || typeof block !== "object") { - continue; - } - const record = block as { type?: unknown; text?: unknown }; - if (record.type !== "text" || typeof record.text !== "string") { - continue; - } - const stripped = stripInboundMetadataForUserRole(record.text, role); - const normalized = normalizeSessionText(stripped); - if (normalized) { - parts.push(normalized); - } - } - if (parts.length === 0) { - return null; - } - return parts.join(" "); + const stripped = stripInboundMetadataForUserRole(rawText, role); + const normalized = normalizeSessionText(stripped); + return normalized ? normalized : null; } export async function buildSessionEntry(absPath: string): Promise { diff --git a/src/memory-host-sdk/host/session-files.test.ts b/src/memory-host-sdk/host/session-files.test.ts index 6df89081b99..47e6213ce31 100644 --- a/src/memory-host-sdk/host/session-files.test.ts +++ b/src/memory-host-sdk/host/session-files.test.ts @@ -197,6 +197,36 @@ describe("buildSessionEntry", () => { expect(contentLines[1]).toBe("Assistant: 好的,我来查一下"); }); + it("strips inbound metadata when a user envelope is split across text blocks", async () => { + const jsonlLines = [ + JSON.stringify({ + type: "message", + message: { + role: "user", + content: [ + { type: "text", text: "Conversation info (untrusted metadata):" }, + { type: "text", text: "```json" }, + { type: "text", text: '{"message_id":"msg-100","chat_id":"-100123"}' }, + { type: "text", text: "```" }, + { type: "text", text: "" }, + { type: "text", text: "Sender (untrusted metadata):" }, + { type: "text", text: "```json" }, + { type: "text", text: '{"label":"Chris","id":"42"}' }, + { type: "text", text: "```" }, + { type: "text", text: "" }, + { type: "text", text: "Actual user text" }, + ], + }, + }), + ]; + const filePath = path.join(tmpDir, "enveloped-session-array.jsonl"); + await fs.writeFile(filePath, jsonlLines.join("\n")); + + const entry = await buildSessionEntry(filePath); + expect(entry).not.toBeNull(); + expect(entry!.content).toBe("User: Actual user text"); + }); + it("preserves assistant messages that happen to contain sentinel-like text", async () => { // Assistant role must NOT be stripped — only user messages carry inbound // envelopes, and assistants may legitimately discuss metadata formats. diff --git a/src/memory-host-sdk/host/session-files.ts b/src/memory-host-sdk/host/session-files.ts index 3263d8d0890..bc30386061b 100644 --- a/src/memory-host-sdk/host/session-files.ts +++ b/src/memory-host-sdk/host/session-files.ts @@ -183,6 +183,26 @@ function normalizeSessionText(value: string): string { .trim(); } +function collectRawSessionText(content: unknown): string | null { + if (typeof content === "string") { + return content; + } + if (!Array.isArray(content)) { + return null; + } + const parts: string[] = []; + for (const block of content) { + if (!block || typeof block !== "object") { + continue; + } + const record = block as { type?: unknown; text?: unknown }; + if (record.type === "text" && typeof record.text === "string") { + parts.push(record.text); + } + } + return parts.length > 0 ? parts.join("\n") : null; +} + /** * Strip OpenClaw-injected inbound metadata envelopes from a raw text block. * @@ -207,33 +227,13 @@ export function extractSessionText( content: unknown, role: "user" | "assistant" = "assistant", ): string | null { - if (typeof content === "string") { - const stripped = stripInboundMetadataForUserRole(content, role); - const normalized = normalizeSessionText(stripped); - return normalized ? normalized : null; - } - if (!Array.isArray(content)) { + const rawText = collectRawSessionText(content); + if (rawText === null) { return null; } - const parts: string[] = []; - for (const block of content) { - if (!block || typeof block !== "object") { - continue; - } - const record = block as { type?: unknown; text?: unknown }; - if (record.type !== "text" || typeof record.text !== "string") { - continue; - } - const stripped = stripInboundMetadataForUserRole(record.text, role); - const normalized = normalizeSessionText(stripped); - if (normalized) { - parts.push(normalized); - } - } - if (parts.length === 0) { - return null; - } - return parts.join(" "); + const stripped = stripInboundMetadataForUserRole(rawText, role); + const normalized = normalizeSessionText(stripped); + return normalized ? normalized : null; } function parseSessionTimestampMs(