mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 13:30:42 +00:00
memory: handle split inbound envelopes in session corpus
This commit is contained in:
@@ -120,4 +120,34 @@ describe("buildSessionEntry", () => {
|
||||
expect(entry).not.toBeNull();
|
||||
expect(entry!.lineMap).toEqual([3, 5]);
|
||||
});
|
||||
|
||||
it("strips inbound metadata when a user envelope is split across text blocks", async () => {
|
||||
const jsonlLines = [
|
||||
JSON.stringify({
|
||||
type: "message",
|
||||
message: {
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: "Conversation info (untrusted metadata):" },
|
||||
{ type: "text", text: "```json" },
|
||||
{ type: "text", text: '{"message_id":"msg-100","chat_id":"-100123"}' },
|
||||
{ type: "text", text: "```" },
|
||||
{ type: "text", text: "" },
|
||||
{ type: "text", text: "Sender (untrusted metadata):" },
|
||||
{ type: "text", text: "```json" },
|
||||
{ type: "text", text: '{"label":"Chris","id":"42"}' },
|
||||
{ type: "text", text: "```" },
|
||||
{ type: "text", text: "" },
|
||||
{ type: "text", text: "Actual user text" },
|
||||
],
|
||||
},
|
||||
}),
|
||||
];
|
||||
const filePath = path.join(tmpDir, "enveloped-session-array.jsonl");
|
||||
await fs.writeFile(filePath, jsonlLines.join("\n"));
|
||||
|
||||
const entry = await buildSessionEntry(filePath);
|
||||
expect(entry).not.toBeNull();
|
||||
expect(entry!.content).toBe("User: Actual user text");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -69,6 +69,26 @@ function normalizeSessionText(value: string): string {
|
||||
.trim();
|
||||
}
|
||||
|
||||
function collectRawSessionText(content: unknown): string | null {
|
||||
if (typeof content === "string") {
|
||||
return content;
|
||||
}
|
||||
if (!Array.isArray(content)) {
|
||||
return null;
|
||||
}
|
||||
const parts: string[] = [];
|
||||
for (const block of content) {
|
||||
if (!block || typeof block !== "object") {
|
||||
continue;
|
||||
}
|
||||
const record = block as { type?: unknown; text?: unknown };
|
||||
if (record.type === "text" && typeof record.text === "string") {
|
||||
parts.push(record.text);
|
||||
}
|
||||
}
|
||||
return parts.length > 0 ? parts.join("\n") : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip OpenClaw-injected inbound metadata envelopes from a raw text block
|
||||
* on user-role messages before normalization. See the authoritative
|
||||
@@ -86,33 +106,13 @@ export function extractSessionText(
|
||||
content: unknown,
|
||||
role: "user" | "assistant" = "assistant",
|
||||
): string | null {
|
||||
if (typeof content === "string") {
|
||||
const stripped = stripInboundMetadataForUserRole(content, role);
|
||||
const normalized = normalizeSessionText(stripped);
|
||||
return normalized ? normalized : null;
|
||||
}
|
||||
if (!Array.isArray(content)) {
|
||||
const rawText = collectRawSessionText(content);
|
||||
if (rawText === null) {
|
||||
return null;
|
||||
}
|
||||
const parts: string[] = [];
|
||||
for (const block of content) {
|
||||
if (!block || typeof block !== "object") {
|
||||
continue;
|
||||
}
|
||||
const record = block as { type?: unknown; text?: unknown };
|
||||
if (record.type !== "text" || typeof record.text !== "string") {
|
||||
continue;
|
||||
}
|
||||
const stripped = stripInboundMetadataForUserRole(record.text, role);
|
||||
const normalized = normalizeSessionText(stripped);
|
||||
if (normalized) {
|
||||
parts.push(normalized);
|
||||
}
|
||||
}
|
||||
if (parts.length === 0) {
|
||||
return null;
|
||||
}
|
||||
return parts.join(" ");
|
||||
const stripped = stripInboundMetadataForUserRole(rawText, role);
|
||||
const normalized = normalizeSessionText(stripped);
|
||||
return normalized ? normalized : null;
|
||||
}
|
||||
|
||||
export async function buildSessionEntry(absPath: string): Promise<SessionFileEntry | null> {
|
||||
|
||||
@@ -197,6 +197,36 @@ describe("buildSessionEntry", () => {
|
||||
expect(contentLines[1]).toBe("Assistant: 好的,我来查一下");
|
||||
});
|
||||
|
||||
it("strips inbound metadata when a user envelope is split across text blocks", async () => {
|
||||
const jsonlLines = [
|
||||
JSON.stringify({
|
||||
type: "message",
|
||||
message: {
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: "Conversation info (untrusted metadata):" },
|
||||
{ type: "text", text: "```json" },
|
||||
{ type: "text", text: '{"message_id":"msg-100","chat_id":"-100123"}' },
|
||||
{ type: "text", text: "```" },
|
||||
{ type: "text", text: "" },
|
||||
{ type: "text", text: "Sender (untrusted metadata):" },
|
||||
{ type: "text", text: "```json" },
|
||||
{ type: "text", text: '{"label":"Chris","id":"42"}' },
|
||||
{ type: "text", text: "```" },
|
||||
{ type: "text", text: "" },
|
||||
{ type: "text", text: "Actual user text" },
|
||||
],
|
||||
},
|
||||
}),
|
||||
];
|
||||
const filePath = path.join(tmpDir, "enveloped-session-array.jsonl");
|
||||
await fs.writeFile(filePath, jsonlLines.join("\n"));
|
||||
|
||||
const entry = await buildSessionEntry(filePath);
|
||||
expect(entry).not.toBeNull();
|
||||
expect(entry!.content).toBe("User: Actual user text");
|
||||
});
|
||||
|
||||
it("preserves assistant messages that happen to contain sentinel-like text", async () => {
|
||||
// Assistant role must NOT be stripped — only user messages carry inbound
|
||||
// envelopes, and assistants may legitimately discuss metadata formats.
|
||||
|
||||
@@ -183,6 +183,26 @@ function normalizeSessionText(value: string): string {
|
||||
.trim();
|
||||
}
|
||||
|
||||
function collectRawSessionText(content: unknown): string | null {
|
||||
if (typeof content === "string") {
|
||||
return content;
|
||||
}
|
||||
if (!Array.isArray(content)) {
|
||||
return null;
|
||||
}
|
||||
const parts: string[] = [];
|
||||
for (const block of content) {
|
||||
if (!block || typeof block !== "object") {
|
||||
continue;
|
||||
}
|
||||
const record = block as { type?: unknown; text?: unknown };
|
||||
if (record.type === "text" && typeof record.text === "string") {
|
||||
parts.push(record.text);
|
||||
}
|
||||
}
|
||||
return parts.length > 0 ? parts.join("\n") : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip OpenClaw-injected inbound metadata envelopes from a raw text block.
|
||||
*
|
||||
@@ -207,33 +227,13 @@ export function extractSessionText(
|
||||
content: unknown,
|
||||
role: "user" | "assistant" = "assistant",
|
||||
): string | null {
|
||||
if (typeof content === "string") {
|
||||
const stripped = stripInboundMetadataForUserRole(content, role);
|
||||
const normalized = normalizeSessionText(stripped);
|
||||
return normalized ? normalized : null;
|
||||
}
|
||||
if (!Array.isArray(content)) {
|
||||
const rawText = collectRawSessionText(content);
|
||||
if (rawText === null) {
|
||||
return null;
|
||||
}
|
||||
const parts: string[] = [];
|
||||
for (const block of content) {
|
||||
if (!block || typeof block !== "object") {
|
||||
continue;
|
||||
}
|
||||
const record = block as { type?: unknown; text?: unknown };
|
||||
if (record.type !== "text" || typeof record.text !== "string") {
|
||||
continue;
|
||||
}
|
||||
const stripped = stripInboundMetadataForUserRole(record.text, role);
|
||||
const normalized = normalizeSessionText(stripped);
|
||||
if (normalized) {
|
||||
parts.push(normalized);
|
||||
}
|
||||
}
|
||||
if (parts.length === 0) {
|
||||
return null;
|
||||
}
|
||||
return parts.join(" ");
|
||||
const stripped = stripInboundMetadataForUserRole(rawText, role);
|
||||
const normalized = normalizeSessionText(stripped);
|
||||
return normalized ? normalized : null;
|
||||
}
|
||||
|
||||
function parseSessionTimestampMs(
|
||||
|
||||
Reference in New Issue
Block a user