From 82e349a48ad9b672c18d0eec5057d51c8ceafbd8 Mon Sep 17 00:00:00 2001 From: zqchris Date: Fri, 17 Apr 2026 02:15:44 +0800 Subject: [PATCH] memory: strip inbound metadata envelopes from user messages in session corpus (#66548) Merged via squash. Prepared head SHA: 98562b2a84450b039d78034fcb10122edafc235f Co-authored-by: zqchris <4436110+zqchris@users.noreply.github.com> Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com> Reviewed-by: @jalehman --- CHANGELOG.md | 59 ++++++++++++ .../src/host/session-files.test.ts | 30 ++++++ .../memory-host-sdk/src/host/session-files.ts | 43 ++++++--- .../host/session-files.test.ts | 95 +++++++++++++++++++ src/memory-host-sdk/host/session-files.ts | 50 +++++++--- 5 files changed, 253 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 38c725617e8..384885bf0f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,7 @@ Docs: https://docs.openclaw.ai - BlueBubbles/inbound: restore inbound image attachment downloads on Node 22+ by stripping incompatible bundled-undici dispatchers from the non-SSRF fetch path, accept `updated-message` webhooks carrying attachments, use event-type-aware dedup keys so attachment follow-ups are not rejected as duplicates, and retry attachment fetch from the BB API when the initial webhook arrives with an empty array. (#64105, #61861, #65430, #67510) Thanks @omarshahine. - Agents/skills: sort prompt-facing `available_skills` entries by skill name after merging sources so `skills.load.extraDirs` order no longer changes prompt-cache prefixes. (#64198) Thanks @Bartok9. - Agents/OpenAI Responses: add `models.providers.*.models.*.compat.supportsPromptCacheKey` so OpenAI-compatible proxies that forward `prompt_cache_key` can keep prompt caching enabled while incompatible endpoints can still force stripping. (#67427) Thanks @damselem. +- Memory/dreaming: strip AI-facing inbound metadata envelopes from session-corpus user turns before normalization so REM topic extraction sees the user's actual message text, including array-shaped split envelopes. (#66548) Thanks @zqchris. ## 2026.4.15-beta.1 @@ -120,6 +121,64 @@ Docs: https://docs.openclaw.ai - Dreaming/memory-core: use the ingestion day, not the source file day, for daily recall dedupe so repeat sweeps of the same daily note can increment `dailyCount` across days instead of stalling at `1`. (#67091) Thanks @Bartok9. - Node-host/tools.exec: let approval binding distinguish known native binaries from mutable shell payload files, while still fail-closing unknown or racy file probes so absolute-path node-host commands like `/usr/bin/whoami` no longer get rejected as unsafe interpreter/runtime commands. (#66731) Thanks @tmimmanuel. +## 2026.4.15-beta.1 + +### Changes + +- Control UI/Overview: add a Model Auth status card showing OAuth token health and provider rate-limit pressure at a glance, with attention callouts when OAuth tokens are expiring or expired. Backed by a new `models.authStatus` gateway method that strips credentials and caches for 60s. (#66211) Thanks @omarshahine. +- Memory/LanceDB: add cloud storage support to `memory-lancedb` so durable memory indexes can run on remote object storage instead of local disk only. (#63502) Thanks @rugvedS07. +- GitHub Copilot/memory search: add a GitHub Copilot embedding provider for memory search, and expose a dedicated Copilot embedding host helper so plugins can reuse the transport while honoring remote overrides, token refresh, and safer payload validation. (#61718) Thanks @feiskyer and @vincentkoc. +- Agents/local models: add experimental `agents.defaults.experimental.localModelLean: true` to drop heavyweight default tools like `browser`, `cron`, and `message`, reducing prompt size for weaker local-model setups without changing the normal path. (#66495) Thanks @ImLukeF. +- Packaging/plugins: localize bundled plugin runtime deps to their owning extensions, trim the published docs payload, and tighten install/package-manager guardrails so published builds stay leaner and core stops carrying extension-owned runtime baggage. (#67099) Thanks @vincentkoc. +- QA/Matrix: split Matrix live QA into a source-linked `qa-matrix` runner and keep repo-private `qa-*` surfaces out of packaged and published builds. (#66723) Thanks @gumadeiras. +- Docs/showcase: add a scannable hero, complete section jump links, and a responsive video grid for community examples. (#48493) Thanks @jchopard69. + +### Fixes + +- Security/approvals: redact secrets in exec approval prompts so inline approval review can no longer leak credential material in rendered prompt content. (#61077, #64790) +- CLI/configure: re-read the persisted config hash after writes so config updates stop failing with stale-hash races. (#64188, #66528) +- CLI/update: prune stale packaged `dist` chunks after npm upgrades and keep downgrade/verify inventory checks compat-safe so global upgrades stop failing on stale chunk imports. (#66959) Thanks @obviyus. +- Onboarding/CLI: fix channel-selection crashes on globally installed CLI setups during onboarding. (#66736) +- Video generation/live tests: bound provider polling for live video smoke, default to the fast non-FAL text-to-video path, and use a one-second lobster prompt so release validation no longer waits indefinitely on slow provider queues. +- Memory-core/QMD `memory_get`: reject reads of arbitrary workspace markdown paths and only allow canonical memory files (`MEMORY.md`, `memory.md`, `DREAMS.md`, `dreams.md`, `memory/**`) plus exact paths of active indexed QMD workspace documents, so the QMD memory backend can no longer be used as a generic workspace-file read shim that bypasses `read` tool-policy denials. (#66026) Thanks @eleqtrizit. +- Cron/agents: forward embedded-run tool policy and internal event params into the attempt layer so `--tools` allowlists, cron-owned message-tool suppression, explicit message targeting, and command-path internal events all take effect at runtime again. (#62675) Thanks @hexsprite. +- Setup/providers: guard preferred-provider lookup during setup so malformed plugin metadata with a missing provider id no longer crashes the wizard with `Cannot read properties of undefined (reading 'trim')`. (#66649) Thanks @Tianworld. +- Matrix/security: normalize sandboxed profile avatar params, preserve `mxc://` avatar URLs, and surface gmail watcher stop failures during reload. (#64701) Thanks @slepybear. +- Telegram/documents: drop leaked binary caption bytes from inbound Telegram text handling so document uploads like `.mobi` or `.epub` no longer explode prompt token counts. (#66663) Thanks @joelnishanth. +- Gateway/auth: resolve the active gateway bearer per-request on the HTTP server and the HTTP upgrade handler via `getResolvedAuth()`, mirroring the WebSocket path, so a secret rotated through `secrets.reload` or config hot-reload stops authenticating on `/v1/*`, `/tools/invoke`, plugin HTTP routes, and the canvas upgrade path immediately instead of remaining valid on HTTP until gateway restart. (#66651) Thanks @mmaps. +- Agents/compaction: cap the compaction reserve-token floor to the model context window so small-context local models (e.g. Ollama with 16K tokens) no longer trigger context-overflow errors or infinite compaction loops on every prompt. (#65671) Thanks @openperf. +- Agents/OpenAI Responses: classify the exact `Unknown error (no error details in response)` transport failure as failover reason `unknown` so assistant/model fallback still runs for that no-details failure path. (#65254) Thanks @OpenCodeEngineer. +- Models/probe: surface invalid-model probe failures as `format` instead of `unknown` in `models list --probe`, and lock the invalid-model fallback path in with regression coverage. (#50028) Thanks @xiwuqi. +- Agents/failover: classify OpenAI-compatible `finish_reason: network_error` stream failures as timeout so model fallback retries continue instead of stopping with an unknown failover reason. (#61784) thanks @lawrence3699. +- Onboarding/channels: normalize channel setup metadata before discovery and validation so malformed or mixed-shape channel plugin metadata no longer breaks setup and onboarding channel lists. (#66706) Thanks @darkamenosa. +- Slack/native commands: fix option menus for slash commands such as `/verbose` when Slack renders native buttons by giving each button a unique action ID while still routing them through the shared `openclaw_cmdarg*` listener. Thanks @Wangmerlyn. +- Feishu/webhook: harden the webhook transport and card-action replay guards to fail closed on missing `encryptKey` and blank callback tokens — refuse to start the webhook transport without an `encryptKey`, reject unsigned requests when no key is present instead of accepting them, and drop blank card-action tokens before the dedupe claim and dispatcher. Defense-in-depth over the already-closed monitor-account layer. (#66707) Thanks @eleqtrizit. +- Agents/workspace files: route `agents.files.get`, `agents.files.set`, and workspace listing through the shared `fs-safe` helpers (`openFileWithinRoot`/`readFileWithinRoot`/`writeFileWithinRoot`), reject symlink aliases for allowlisted agent files, and have `fs-safe` resolve opened-file real paths from the file descriptor before falling back to path-based `realpath` so a symlink swap between `open` and `realpath` can no longer redirect the validated path off the intended inode. (#66636) Thanks @eleqtrizit. +- Gateway/MCP loopback: switch the `/mcp` bearer comparison from plain `!==` to constant-time `safeEqualSecret` (matching the convention every other auth surface in the codebase uses), and reject non-loopback browser-origin requests via `checkBrowserOrigin` before the auth gate runs. Loopback origins (`127.0.0.1:*`, `localhost:*`, same-origin) still go through, including the `localhost`↔`127.0.0.1` host mismatch that browsers flag as `Sec-Fetch-Site: cross-site`. (#66665) Thanks @eleqtrizit. +- Auto-reply/billing: classify pure billing cooldown fallback summaries from structured fallback reasons so users see billing guidance instead of the generic failure reply. (#66363) Thanks @Rohan5commit. +- Agents/fallback: preserve the original prompt body on model fallback retries with session history so the retrying model keeps the active task instead of only seeing a generic continue message. (#66029) Thanks @WuKongAI-CMU. +- Reply/secrets: resolve active reply channel/account SecretRefs before reply-run message-action discovery so channel token SecretRefs (for example Discord) do not degrade into discovery-time unresolved-secret failures. (#66796) Thanks @joshavant. +- Agents/Anthropic: ignore non-positive Anthropic Messages token overrides and fail locally when no positive token budget remains, so invalid `max_tokens` values no longer reach the provider API. (#66664) thanks @jalehman +- Agents/context engines: preserve prompt-only token counts, not full request totals, when deferred maintenance reuses after-turn runtime context so background compaction bookkeeping matches the active prompt window. (#66820) thanks @jalehman. +- BlueBubbles/inbound: add a persistent file-backed GUID dedupe so MessagePoller webhook replays after BB Server restart or reconnect no longer cause the agent to re-reply to already-handled messages. (#19176, #12053, #66816) Thanks @omarshahine. +- Secrets/plugins/status: align SecretRef inspect-vs-strict handling across plugin preload, read-only status/agents surfaces, and runtime auth paths so unresolved refs no longer crash read-only CLI flows while runtime-required non-env refs stay strict. (#66818) Thanks @joshavant. +- Memory/dreaming: stop ordinary transcripts that merely quote the dream-diary prompt from being classified as internal dreaming runs and silently dropped from session recall ingestion. (#66852) Thanks @gumadeiras. +- Telegram/documents: sanitize binary reply context and ZIP-like archive extraction so `.epub` and `.mobi` uploads can no longer leak raw binary into prompt context through reply metadata or archive-to-`text/plain` coercion. (#66877) Thanks @martinfrancois. +- Telegram/native commands: restore plugin-registry-backed auto defaults for native commands and native skills so Telegram slash commands keep registering when `commands.native` and `commands.nativeSkills` stay on `auto`. (#66843) Thanks @kashevk0. +- OpenRouter/Qwen3: parse `reasoning_details` stream deltas as thinking content without skipping same-chunk tool calls, so Qwen3 replies no longer fail empty on OpenRouter and mixed reasoning/tool-call chunks still execute normally. (#66905) Thanks @bladin. +- fix(bluebubbles): replay missed webhook messages after gateway restart via a persistent per-account cursor and `/api/v1/message/query?after=` pass, so messages delivered while the gateway was down no longer disappear. Uses the existing `processMessage` path and is deduped by #66816's inbound GUID cache. (#66857, #66721) Thanks @omarshahine. +- Telegram/native commands: keep Telegram command-sync cache process-local so gateway restarts re-register the menu instead of trusting stale on-disk sync state after Telegram cleared commands out-of-band. (#66730) Thanks @nightq. +- Audio/self-hosted STT: restore `models.providers.*.request.allowPrivateNetwork` for audio transcription so private or LAN speech-to-text endpoints stop tripping SSRF blocks after the v2026.4.14 regression. (#66692) Thanks @jhsmith409. +- Auto-reply/media: allow workspace-rooted absolute media paths in auto-reply send flows so valid local media references no longer fail path validation. (#66689) +- WhatsApp/Baileys media upload: harden encrypted upload handling so large outbound media sends avoid buffer spikes and reliability regressions. (#65966) Thanks @frankekn. +- QQBot/cron: guard against undefined `event.content` in `parseFaceTags` and `filterInternalMarkers` so cron-triggered agent turns with no content payload no longer crash with `TypeError: Cannot read properties of undefined (reading 'startsWith')`. (#66302) Thanks @xinmotlanthua. +- CLI/plugins: stop `--dangerously-force-unsafe-install` plugin installs from falling back to hook-pack installs after security scan failures, while still preserving non-security fallback behavior for real hook packs. (#58909) Thanks @hxy91819. +- Claude CLI/sessions: classify `No conversation found with session ID` as `session_expired` so expired CLI-backed conversations clear the stale binding and recover on the next turn. (#65028) thanks @Ivan-Fn. +- Context Engine: gracefully fall back to the legacy engine when a third-party context engine plugin fails at resolution time (unregistered id, factory throw, or contract violation), preventing a full gateway outage on every channel. (#66930) Thanks @openperf. +- Control UI/chat: keep optimistic user message cards visible during active sends by deferring same-session history reloads until the active run ends, including aborted and errored runs. (#66997) Thanks @scotthuang and @vincentkoc. +- Media/Slack: allow host-local CSV and Markdown uploads only when the fallback buffer actually decodes as text, so real plain-text files work without letting opaque non-text blobs renamed to `.csv` or `.md` slip past the host-read guard. (#67047) Thanks @Unayung. +- Ollama/onboarding: split setup into `Cloud + Local`, `Cloud only`, and `Local only`, support direct `OLLAMA_API_KEY` cloud setup without a local daemon, and keep Ollama web search on the local-host path. (#67005) Thanks @obviyus. + ## 2026.4.14 ### Changes diff --git a/packages/memory-host-sdk/src/host/session-files.test.ts b/packages/memory-host-sdk/src/host/session-files.test.ts index 476aa35644b..7ad7ee8c32a 100644 --- a/packages/memory-host-sdk/src/host/session-files.test.ts +++ b/packages/memory-host-sdk/src/host/session-files.test.ts @@ -120,4 +120,34 @@ describe("buildSessionEntry", () => { expect(entry).not.toBeNull(); expect(entry!.lineMap).toEqual([3, 5]); }); + + it("strips inbound metadata when a user envelope is split across text blocks", async () => { + const jsonlLines = [ + JSON.stringify({ + type: "message", + message: { + role: "user", + content: [ + { type: "text", text: "Conversation info (untrusted metadata):" }, + { type: "text", text: "```json" }, + { type: "text", text: '{"message_id":"msg-100","chat_id":"-100123"}' }, + { type: "text", text: "```" }, + { type: "text", text: "" }, + { type: "text", text: "Sender (untrusted metadata):" }, + { type: "text", text: "```json" }, + { type: "text", text: '{"label":"Chris","id":"42"}' }, + { type: "text", text: "```" }, + { type: "text", text: "" }, + { type: "text", text: "Actual user text" }, + ], + }, + }), + ]; + const filePath = path.join(tmpDir, "enveloped-session-array.jsonl"); + await fs.writeFile(filePath, jsonlLines.join("\n")); + + const entry = await buildSessionEntry(filePath); + expect(entry).not.toBeNull(); + expect(entry!.content).toBe("User: Actual user text"); + }); }); diff --git a/packages/memory-host-sdk/src/host/session-files.ts b/packages/memory-host-sdk/src/host/session-files.ts index fc25a35a4d7..d2a1116189e 100644 --- a/packages/memory-host-sdk/src/host/session-files.ts +++ b/packages/memory-host-sdk/src/host/session-files.ts @@ -1,5 +1,6 @@ import fs from "node:fs/promises"; import path from "node:path"; +import { stripInboundMetadata } from "../../../../src/auto-reply/reply/strip-inbound-meta.js"; import { isUsageCountedSessionTranscriptFileName } from "../../../../src/config/sessions/artifacts.js"; import { resolveSessionTranscriptsDirForAgent } from "../../../../src/config/sessions/paths.js"; import { redactSensitiveText } from "../../../../src/logging/redact.js"; @@ -68,10 +69,9 @@ function normalizeSessionText(value: string): string { .trim(); } -export function extractSessionText(content: unknown): string | null { +function collectRawSessionText(content: unknown): string | null { if (typeof content === "string") { - const normalized = normalizeSessionText(content); - return normalized ? normalized : null; + return content; } if (!Array.isArray(content)) { return null; @@ -82,18 +82,37 @@ export function extractSessionText(content: unknown): string | null { continue; } const record = block as { type?: unknown; text?: unknown }; - if (record.type !== "text" || typeof record.text !== "string") { - continue; - } - const normalized = normalizeSessionText(record.text); - if (normalized) { - parts.push(normalized); + if (record.type === "text" && typeof record.text === "string") { + parts.push(record.text); } } - if (parts.length === 0) { + return parts.length > 0 ? parts.join("\n") : null; +} + +/** + * Strip OpenClaw-injected inbound metadata envelopes from a raw text block + * on user-role messages before normalization. See the authoritative + * implementation in `src/memory-host-sdk/host/session-files.ts` for the + * full rationale; duplicated here to keep this parallel copy bug-free. + */ +function stripInboundMetadataForUserRole(text: string, role: "user" | "assistant"): string { + if (role !== "user") { + return text; + } + return stripInboundMetadata(text); +} + +export function extractSessionText( + content: unknown, + role: "user" | "assistant" = "assistant", +): string | null { + const rawText = collectRawSessionText(content); + if (rawText === null) { return null; } - return parts.join(" "); + const stripped = stripInboundMetadataForUserRole(rawText, role); + const normalized = normalizeSessionText(stripped); + return normalized ? normalized : null; } export async function buildSessionEntry(absPath: string): Promise { @@ -134,7 +153,7 @@ export async function buildSessionEntry(absPath: string): Promise { ]); }); + it("strips inbound metadata envelope from user messages before normalization", async () => { + // Real Telegram inbound envelope: Conversation info + Sender blocks prepended + // to the actual user text. Without stripping, the JSON envelope dominates + // the corpus entry and the user's real words get truncated by the + // SESSION_INGESTION_MAX_SNIPPET_CHARS cap downstream. + // See: https://github.com/openclaw/openclaw/issues/63921 + const envelopedUserText = [ + "Conversation info (untrusted metadata):", + "```json", + '{"message_id":"msg-100","chat_id":"-100123","sender":"Chris"}', + "```", + "", + "Sender (untrusted metadata):", + "```json", + '{"label":"Chris","name":"Chris","id":"42"}', + "```", + "", + "帮我看看今天的 Oura 数据", + ].join("\n"); + + const jsonlLines = [ + JSON.stringify({ + type: "message", + message: { role: "user", content: envelopedUserText }, + }), + JSON.stringify({ + type: "message", + message: { role: "assistant", content: "好的,我来查一下" }, + }), + ]; + const filePath = path.join(tmpDir, "enveloped-session.jsonl"); + await fs.writeFile(filePath, jsonlLines.join("\n")); + + const entry = await buildSessionEntry(filePath); + expect(entry).not.toBeNull(); + + const contentLines = entry!.content.split("\n"); + expect(contentLines).toHaveLength(2); + // User line should contain ONLY the real user text, not the JSON envelope. + expect(contentLines[0]).toBe("User: 帮我看看今天的 Oura 数据"); + expect(contentLines[0]).not.toContain("untrusted metadata"); + expect(contentLines[0]).not.toContain("message_id"); + expect(contentLines[0]).not.toContain("```json"); + expect(contentLines[1]).toBe("Assistant: 好的,我来查一下"); + }); + + it("strips inbound metadata when a user envelope is split across text blocks", async () => { + const jsonlLines = [ + JSON.stringify({ + type: "message", + message: { + role: "user", + content: [ + { type: "text", text: "Conversation info (untrusted metadata):" }, + { type: "text", text: "```json" }, + { type: "text", text: '{"message_id":"msg-100","chat_id":"-100123"}' }, + { type: "text", text: "```" }, + { type: "text", text: "" }, + { type: "text", text: "Sender (untrusted metadata):" }, + { type: "text", text: "```json" }, + { type: "text", text: '{"label":"Chris","id":"42"}' }, + { type: "text", text: "```" }, + { type: "text", text: "" }, + { type: "text", text: "Actual user text" }, + ], + }, + }), + ]; + const filePath = path.join(tmpDir, "enveloped-session-array.jsonl"); + await fs.writeFile(filePath, jsonlLines.join("\n")); + + const entry = await buildSessionEntry(filePath); + expect(entry).not.toBeNull(); + expect(entry!.content).toBe("User: Actual user text"); + }); + + it("preserves assistant messages that happen to contain sentinel-like text", async () => { + // Assistant role must NOT be stripped — only user messages carry inbound + // envelopes, and assistants may legitimately discuss metadata formats. + const assistantText = + "The envelope format uses 'Conversation info (untrusted metadata):' as a sentinel"; + const jsonlLines = [ + JSON.stringify({ + type: "message", + message: { role: "assistant", content: assistantText }, + }), + ]; + const filePath = path.join(tmpDir, "assistant-sentinel.jsonl"); + await fs.writeFile(filePath, jsonlLines.join("\n")); + + const entry = await buildSessionEntry(filePath); + expect(entry).not.toBeNull(); + expect(entry!.content).toBe(`Assistant: ${assistantText}`); + }); + it("flags dreaming narrative transcripts from bootstrap metadata", async () => { const jsonlLines = [ JSON.stringify({ diff --git a/src/memory-host-sdk/host/session-files.ts b/src/memory-host-sdk/host/session-files.ts index 5865cf9bbb3..bc30386061b 100644 --- a/src/memory-host-sdk/host/session-files.ts +++ b/src/memory-host-sdk/host/session-files.ts @@ -1,5 +1,6 @@ import fs from "node:fs/promises"; import path from "node:path"; +import { stripInboundMetadata } from "../../auto-reply/reply/strip-inbound-meta.js"; import { isUsageCountedSessionTranscriptFileName } from "../../config/sessions/artifacts.js"; import { resolveSessionTranscriptsDirForAgent } from "../../config/sessions/paths.js"; import { loadSessionStore } from "../../config/sessions/store-load.js"; @@ -182,10 +183,9 @@ function normalizeSessionText(value: string): string { .trim(); } -export function extractSessionText(content: unknown): string | null { +function collectRawSessionText(content: unknown): string | null { if (typeof content === "string") { - const normalized = normalizeSessionText(content); - return normalized ? normalized : null; + return content; } if (!Array.isArray(content)) { return null; @@ -196,18 +196,44 @@ export function extractSessionText(content: unknown): string | null { continue; } const record = block as { type?: unknown; text?: unknown }; - if (record.type !== "text" || typeof record.text !== "string") { - continue; - } - const normalized = normalizeSessionText(record.text); - if (normalized) { - parts.push(normalized); + if (record.type === "text" && typeof record.text === "string") { + parts.push(record.text); } } - if (parts.length === 0) { + return parts.length > 0 ? parts.join("\n") : null; +} + +/** + * Strip OpenClaw-injected inbound metadata envelopes from a raw text block. + * + * User-role messages arriving from external channels (Telegram, Discord, + * Slack, …) are stored with a multi-line prefix containing Conversation info, + * Sender info, and other AI-facing metadata blocks. These envelopes must be + * removed BEFORE normalization, because `stripInboundMetadata` relies on + * newline structure and fenced `json` code fences to locate sentinels; once + * `normalizeSessionText` collapses newlines into spaces, stripping is + * impossible. + * + * See: https://github.com/openclaw/openclaw/issues/63921 + */ +function stripInboundMetadataForUserRole(text: string, role: "user" | "assistant"): string { + if (role !== "user") { + return text; + } + return stripInboundMetadata(text); +} + +export function extractSessionText( + content: unknown, + role: "user" | "assistant" = "assistant", +): string | null { + const rawText = collectRawSessionText(content); + if (rawText === null) { return null; } - return parts.join(" "); + const stripped = stripInboundMetadataForUserRole(rawText, role); + const normalized = normalizeSessionText(stripped); + return normalized ? normalized : null; } function parseSessionTimestampMs( @@ -275,7 +301,7 @@ export async function buildSessionEntry( if (message.role !== "user" && message.role !== "assistant") { continue; } - const text = extractSessionText(message.content); + const text = extractSessionText(message.content, message.role); if (!text) { continue; }