diff --git a/src/sessions/user-turn-transcript.test.ts b/src/sessions/user-turn-transcript.test.ts index dd2b4022356..42ce88b85a6 100644 --- a/src/sessions/user-turn-transcript.test.ts +++ b/src/sessions/user-turn-transcript.test.ts @@ -1,5 +1,8 @@ import { describe, expect, it } from "vitest"; -import { buildPersistedUserTurnMediaFields } from "./user-turn-transcript.js"; +import { + buildPersistedUserTurnMediaFields, + buildPersistedUserTurnMessage, +} from "./user-turn-transcript.js"; describe("user turn transcript persistence", () => { describe("buildPersistedUserTurnMediaFields", () => { @@ -66,4 +69,84 @@ describe("user turn transcript persistence", () => { }); }); }); + + describe("buildPersistedUserTurnMessage", () => { + it("builds a plain user transcript message for text-only turns", () => { + expect( + buildPersistedUserTurnMessage({ + text: "hello", + timestamp: 123, + idempotencyKey: "turn-1", + }), + ).toEqual({ + role: "user", + content: "hello", + timestamp: 123, + idempotencyKey: "turn-1", + }); + }); + + it("adds structured media fields to the user transcript message", () => { + expect( + buildPersistedUserTurnMessage({ + text: "What is in this image?", + media: [{ path: "/tmp/a.png", contentType: "image/png" }], + timestamp: 123, + }), + ).toEqual({ + role: "user", + content: "What is in this image?", + timestamp: 123, + MediaPath: "/tmp/a.png", + MediaPaths: ["/tmp/a.png"], + MediaType: "image/png", + MediaTypes: ["image/png"], + }); + }); + + it("does not infer media from marker-like user text", () => { + expect( + buildPersistedUserTurnMessage({ + text: "[media attached: media://inbound/photo.png]\nWhat is this?", + timestamp: 123, + }), + ).toEqual({ + role: "user", + content: "[media attached: media://inbound/photo.png]\nWhat is this?", + timestamp: 123, + }); + }); + + it("uses an explicit media-only display text when provided", () => { + expect( + buildPersistedUserTurnMessage({ + text: "", + mediaOnlyText: "[User sent media]", + media: [{ path: "/tmp/a.png", contentType: "image/png" }], + }), + ).toEqual({ + role: "user", + content: "[User sent media]", + MediaPath: "/tmp/a.png", + MediaPaths: ["/tmp/a.png"], + MediaType: "image/png", + MediaTypes: ["image/png"], + }); + }); + + it("keeps media-only transcript content empty by default", () => { + expect( + buildPersistedUserTurnMessage({ + media: [{ path: "/tmp/a.png", contentType: "image/png" }], + }), + ).toEqual({ + role: "user", + content: "", + MediaPath: "/tmp/a.png", + MediaPaths: ["/tmp/a.png"], + MediaType: "image/png", + MediaTypes: ["image/png"], + }); + }); + }); }); diff --git a/src/sessions/user-turn-transcript.ts b/src/sessions/user-turn-transcript.ts index f53fbfa2bda..1e4f1ec23e2 100644 --- a/src/sessions/user-turn-transcript.ts +++ b/src/sessions/user-turn-transcript.ts @@ -1,3 +1,5 @@ +import type { AgentMessage } from "@earendil-works/pi-agent-core"; + export type PersistedUserTurnMediaInput = { path?: string | null; url?: string | null; @@ -12,11 +14,25 @@ export type PersistedUserTurnMediaFields = { MediaTypes?: string[]; }; +export type PersistedUserTurnMessage = Extract; + +export type BuildPersistedUserTurnMessageParams = { + text?: string | null; + media?: readonly PersistedUserTurnMediaInput[] | null; + timestamp?: number; + idempotencyKey?: string; + mediaOnlyText?: string; +}; + function normalizeOptionalText(value: string | null | undefined): string | undefined { const normalized = value?.trim(); return normalized ? normalized : undefined; } +function normalizeTranscriptText(value: string | null | undefined): string { + return value ?? ""; +} + function mediaTypeForTranscript(media: PersistedUserTurnMediaInput): string { return ( normalizeOptionalText(media.contentType) ?? @@ -60,3 +76,19 @@ export function buildPersistedUserTurnMediaFields( MediaTypes: types, }; } + +export function buildPersistedUserTurnMessage( + params: BuildPersistedUserTurnMessageParams, +): PersistedUserTurnMessage { + const mediaFields = buildPersistedUserTurnMediaFields(params.media); + const hasMedia = Boolean(mediaFields.MediaPath); + const text = normalizeTranscriptText(params.text); + const content = text || (hasMedia ? (params.mediaOnlyText ?? "") : ""); + return { + role: "user", + content, + ...(params.timestamp !== undefined ? { timestamp: params.timestamp } : {}), + ...(params.idempotencyKey ? { idempotencyKey: params.idempotencyKey } : {}), + ...mediaFields, + } as PersistedUserTurnMessage; +}