diff --git a/CHANGELOG.md b/CHANGELOG.md index 75a8c6579ce..4678b085f7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai - Plugins/CLI: refresh the persisted registry after managed plugin files are removed so ClawHub uninstall cannot leave stale `plugins list` entries. Thanks @codex. - Plugins/CLI: make plugin install and uninstall config writes conflict-aware, clear stale denylist entries on explicit reinstall/removal, and delete managed plugin files only after config/index commit succeeds. Thanks @codex. - Plugins: fail `plugins update` when tracked plugin or hook updates error, keep bundled runtime-dependency repair behind restrictive allowlists, and reject package installs with unloadable extension entries. Thanks @codex. +- WebChat/Control UI: support non-video file attachments in chat uploads while preserving the existing image attachment path and MIME-sniff fallback for generic image uploads. (#70947) Thanks @IAMSamuelRodda. - Gateway/chat: keep duplicate attachment-backed `chat.send` retries with the same idempotency key on the documented in-flight path so aborts still target the real active run. Fixes #70139. Thanks @Feelw00. - Plugins: share package entrypoint resolution between install and discovery, reject mismatched `runtimeExtensions`, and cache bundled runtime-dependency manifest reads during scans. Thanks @codex. - WhatsApp/Web: keep quiet but healthy linked-device sessions connected by basing the watchdog on WhatsApp Web transport activity, while retaining a longer app-silence cap so frame activity cannot mask a stuck session forever. Fixes #70678; carries forward the focused #71466 approach and keeps #63939 as related configurable-timeout follow-up. Thanks @vincentkoc and @oromeis. diff --git a/docs/web/control-ui.md b/docs/web/control-ui.md index aa2e6609c67..2964d318c34 100644 --- a/docs/web/control-ui.md +++ b/docs/web/control-ui.md @@ -134,6 +134,7 @@ The Control UI can localize itself on first load based on your browser locale. T - `chat.send` is **non-blocking**: it acks immediately with `{ runId, status: "started" }` and the response streams via `chat` events. + - Chat uploads accept images plus non-video files. Images keep the native image path; other files are stored as managed media and shown in history as attachment links. - Re-sending with the same `idempotencyKey` returns `{ status: "in_flight" }` while running, and `{ status: "ok" }` after completion. - `chat.history` responses are size-bounded for UI safety. When transcript entries are too large, Gateway may truncate long text fields, omit heavy metadata blocks, and replace oversized messages with a placeholder (`[chat.history omitted: message too large]`). - Assistant/generated images are persisted as managed media references and served back through authenticated Gateway media URLs, so reloads do not depend on raw base64 image payloads staying in the chat history response. diff --git a/src/gateway/chat-attachments.test.ts b/src/gateway/chat-attachments.test.ts index 2af2fa040d9..c7920754ab8 100644 --- a/src/gateway/chat-attachments.test.ts +++ b/src/gateway/chat-attachments.test.ts @@ -111,14 +111,23 @@ describe("parseMessageWithAttachments", () => { expect(logs[0]).toMatch(/mime mismatch/i); }); - it("drops unknown mime when sniff fails and logs", async () => { + it("persists unknown non-image files when sniff fails", async () => { const unknown = Buffer.from("not an image").toString("base64"); const { parsed, logs } = await parseWithWarnings("x", [ { type: "file", fileName: "unknown.bin", content: unknown }, ]); - expect(parsed.images).toHaveLength(0); - expect(logs).toHaveLength(1); - expect(logs[0]).toMatch(/unable to detect image mime type/i); + try { + expect(parsed.images).toHaveLength(0); + expect(parsed.offloadedRefs).toHaveLength(1); + expect(parsed.offloadedRefs[0]).toMatchObject({ + label: "unknown.bin", + mimeType: "application/octet-stream", + }); + expect(parsed.message).toMatch(/^x\n\[media attached: media:\/\/inbound\//); + expect(logs).toHaveLength(0); + } finally { + await cleanupOffloadedRefs(parsed.offloadedRefs); + } }); it("keeps valid images and drops invalid ones", async () => { @@ -143,6 +152,49 @@ describe("parseMessageWithAttachments", () => { expect(logs.some((l) => /non-image/i.test(l))).toBe(true); }); + it("persists non-image file attachments as media refs", async () => { + const parsed = await parseMessageWithAttachments( + "read this", + [ + { + type: "file", + mimeType: "application/pdf", + fileName: "brief.pdf", + content: Buffer.from("%PDF-1.4\n").toString("base64"), + }, + ], + { log: { warn: () => {} } }, + ); + + try { + expect(parsed.images).toHaveLength(0); + expect(parsed.imageOrder).toEqual(["offloaded"]); + expect(parsed.offloadedRefs).toHaveLength(1); + expect(parsed.offloadedRefs[0]).toMatchObject({ + mimeType: "application/pdf", + label: "brief.pdf", + }); + expect(parsed.message).toMatch(/^read this\n\[media attached: media:\/\/inbound\//); + } finally { + await cleanupOffloadedRefs(parsed.offloadedRefs); + } + }); + + it("keeps image sniff fallback for generic image attachments", async () => { + const { parsed, logs } = await parseWithWarnings("see this", [ + { + type: "file", + mimeType: "application/octet-stream", + fileName: "dot", + content: PNG_1x1, + }, + ]); + expect(parsed.images).toHaveLength(1); + expect(parsed.images[0]?.mimeType).toBe("image/png"); + expect(parsed.offloadedRefs).toHaveLength(0); + expect(logs).toHaveLength(0); + }); + it("offloads images for text-only models instead of dropping them", async () => { const logs: string[] = []; const infos: string[] = []; diff --git a/src/gateway/chat-attachments.ts b/src/gateway/chat-attachments.ts index 79b86f09851..6a90d1e1798 100644 --- a/src/gateway/chat-attachments.ts +++ b/src/gateway/chat-attachments.ts @@ -142,6 +142,19 @@ function isImageMime(mime?: string): boolean { return typeof mime === "string" && mime.startsWith("image/"); } +function isVideoMime(mime?: string): boolean { + return typeof mime === "string" && mime.startsWith("video/"); +} + +function isGenericMime(mime?: string): boolean { + return ( + !mime || + mime === "application/octet-stream" || + mime === "binary/octet-stream" || + mime === "application/unknown" + ); +} + function isValidBase64(value: string): boolean { if (value.length === 0 || value.length % 4 !== 0) { return false; @@ -307,6 +320,7 @@ export async function parseMessageWithAttachments( const offloadedRefs: OffloadedRef[] = []; let updatedMessage = message; const shouldForceOffload = opts?.supportsImages === false; + let textOnlyImageOffloadCount = 0; // Track IDs of files saved during this request for cleanup if a later // attachment fails validation and the entire parse is aborted. @@ -344,15 +358,54 @@ export async function parseMessageWithAttachments( const providedMime = normalizeMime(mime); const sniffedMime = normalizeMime(await sniffMimeFromBase64(b64)); - if (sniffedMime && !isImageMime(sniffedMime)) { + if (sniffedMime && !isImageMime(sniffedMime) && isImageMime(providedMime)) { log?.warn(`attachment ${label}: detected non-image (${sniffedMime}), dropping`); continue; } - if (!sniffedMime && !isImageMime(providedMime)) { - log?.warn(`attachment ${label}: unable to detect image mime type, dropping`); + + const shouldHandleAsImage = + isImageMime(sniffedMime) || (isImageMime(providedMime) && !sniffedMime); + if (!shouldHandleAsImage) { + const finalMime = sniffedMime ?? providedMime ?? "application/octet-stream"; + if (isVideoMime(finalMime)) { + log?.warn(`attachment ${label}: video attachments are not supported, dropping`); + continue; + } + + const buffer = Buffer.from(b64, "base64"); + verifyDecodedSize(buffer, sizeBytes, label); + + try { + const rawResult = await saveMediaBuffer(buffer, finalMime, "inbound", maxBytes, label); + const savedMedia = assertSavedMedia(rawResult, label); + savedMediaIds.push(savedMedia.id); + + const mediaRef = `media://inbound/${savedMedia.id}`; + updatedMessage += `\n[media attached: ${mediaRef}]`; + log?.info?.(`[Gateway] Saved file attachment. Saved: ${mediaRef}`); + offloadedRefs.push({ + mediaRef, + id: savedMedia.id, + path: savedMedia.path ?? "", + mimeType: finalMime, + label, + }); + imageOrder.push("offloaded"); + } catch (err) { + const errorMessage = formatErrorMessage(err); + throw new MediaOffloadError( + `[Gateway Error] Failed to save intercepted media to disk: ${errorMessage}`, + { cause: err }, + ); + } continue; } - if (sniffedMime && providedMime && sniffedMime !== providedMime) { + if ( + sniffedMime && + providedMime && + !isGenericMime(providedMime) && + sniffedMime !== providedMime + ) { log?.warn( `attachment ${label}: mime mismatch (${providedMime} -> ${sniffedMime}), using sniffed`, ); @@ -364,7 +417,7 @@ export async function parseMessageWithAttachments( let isOffloaded = false; - if (shouldForceOffload && offloadedRefs.length >= TEXT_ONLY_OFFLOAD_LIMIT) { + if (shouldForceOffload && textOnlyImageOffloadCount >= TEXT_ONLY_OFFLOAD_LIMIT) { log?.warn( `attachment ${label}: dropping image because text-only offload limit ` + `${TEXT_ONLY_OFFLOAD_LIMIT} was reached`, @@ -437,6 +490,9 @@ export async function parseMessageWithAttachments( label, }); imageOrder.push("offloaded"); + if (shouldForceOffload) { + textOnlyImageOffloadCount++; + } isOffloaded = true; } catch (err) { diff --git a/src/gateway/server-methods/chat.directive-tags.test.ts b/src/gateway/server-methods/chat.directive-tags.test.ts index ed78955c645..e7dacc03d0a 100644 --- a/src/gateway/server-methods/chat.directive-tags.test.ts +++ b/src/gateway/server-methods/chat.directive-tags.test.ts @@ -1789,6 +1789,71 @@ describe("chat directive tag stripping for non-streaming final payloads", () => }); }); + it("persists non-image chat.send attachments as media refs without dispatch images", async () => { + createTranscriptFixture("openclaw-chat-send-user-transcript-file-"); + mockState.finalText = "ok"; + mockState.triggerAgentRunStart = true; + mockState.savedMediaResults = [ + { path: "/tmp/chat-send-brief.pdf", contentType: "application/pdf" }, + ]; + const respond = vi.fn(); + const context = createChatContext(); + + await runNonStreamingChatSend({ + context, + respond, + idempotencyKey: "idem-user-transcript-file", + message: "summarize this", + requestParams: { + attachments: [ + { + type: "file", + mimeType: "application/pdf", + fileName: "brief.pdf", + content: Buffer.from("%PDF-1.4\n").toString("base64"), + }, + ], + }, + expectBroadcast: false, + waitForCompletion: false, + }); + + await waitForAssertion(() => { + const userUpdate = mockState.emittedTranscriptUpdates.find( + (update) => + typeof update.message === "object" && + update.message !== null && + (update.message as { role?: unknown }).role === "user", + ); + const message = userUpdate?.message as + | { + content?: unknown; + MediaPath?: string; + MediaPaths?: string[]; + MediaType?: string; + MediaTypes?: string[]; + } + | undefined; + expect(mockState.lastDispatchImages).toBeUndefined(); + expect(mockState.lastDispatchImageOrder).toEqual(["offloaded"]); + expect(mockState.lastDispatchCtx?.Body).toMatch( + /^summarize this\n\[media attached: media:\/\/inbound\//, + ); + expect(mockState.savedMediaCalls).toEqual([ + expect.objectContaining({ + contentType: "application/pdf", + subdir: "inbound", + size: expect.any(Number), + }), + ]); + expect(message?.content).toMatch(/^summarize this\n\[media attached: media:\/\/inbound\//); + expect(message?.MediaPath).toBe("/tmp/chat-send-brief.pdf"); + expect(message?.MediaPaths).toEqual(["/tmp/chat-send-brief.pdf"]); + expect(message?.MediaType).toBe("application/pdf"); + expect(message?.MediaTypes).toEqual(["application/pdf"]); + }); + }); + it("preserves offloaded attachment media paths in transcript order", async () => { createTranscriptFixture("openclaw-chat-send-user-transcript-offloaded-"); mockState.finalText = "ok"; diff --git a/src/media/store.test.ts b/src/media/store.test.ts index 9c70f208c9e..9334a74c500 100644 --- a/src/media/store.test.ts +++ b/src/media/store.test.ts @@ -157,6 +157,7 @@ describe("media store", () => { async function expectSavedBufferCase(params: { buffer: Buffer; contentType?: string; + originalFilename?: string; expectedContentType: string; expectedExtension: string; assertSaved?: ( @@ -165,7 +166,13 @@ describe("media store", () => { ) => Promise | void; }) { await withTempStore(async (store) => { - const saved = await store.saveMediaBuffer(params.buffer, params.contentType); + const saved = await store.saveMediaBuffer( + params.buffer, + params.contentType, + "inbound", + 5 * 1024 * 1024, + params.originalFilename, + ); expect(saved.contentType).toBe(params.expectedContentType); expect(saved.path.endsWith(params.expectedExtension)).toBe(true); await params.assertSaved?.(saved, params.buffer); @@ -371,6 +378,14 @@ describe("media store", () => { expectedContentType: "image/jpeg", expectedExtension: ".jpg", }, + { + name: "preserves original extension for generic file buffers", + buffer: Buffer.from("custom binary"), + contentType: "application/octet-stream", + originalFilename: "report.custom", + expectedContentType: "application/octet-stream", + expectedExtension: ".custom", + }, ] as const)("$name", async (testCase) => { const buffer = "bufferFactory" in testCase && testCase.bufferFactory @@ -379,8 +394,16 @@ describe("media store", () => { await expectSavedBufferCase({ buffer, contentType: testCase.contentType, + ...("originalFilename" in testCase ? { originalFilename: testCase.originalFilename } : {}), expectedContentType: testCase.expectedContentType, expectedExtension: testCase.expectedExtension, + ...("originalFilename" in testCase + ? { + assertSaved: async (saved: Awaited>) => { + expect(path.basename(saved.path)).toMatch(/^report---.+\.custom$/); + }, + } + : {}), ...("assertSaved" in testCase ? { assertSaved: testCase.assertSaved } : {}), }); }); diff --git a/src/media/store.ts b/src/media/store.ts index f643e51f0b8..4c6a66a4b4a 100644 --- a/src/media/store.ts +++ b/src/media/store.ts @@ -284,6 +284,14 @@ function buildSavedMediaId(params: { : `${params.baseId}${params.ext}`; } +function safeOriginalFilenameExtension(originalFilename?: string): string | undefined { + if (!originalFilename) { + return undefined; + } + const ext = path.extname(originalFilename).toLowerCase(); + return /^\.[a-z0-9]{1,16}$/.test(ext) ? ext : undefined; +} + function buildSavedMediaResult(params: { dir: string; id: string; @@ -419,7 +427,8 @@ export async function saveMediaBuffer( const uuid = crypto.randomUUID(); const headerExt = extensionForMime(normalizeOptionalString(contentType?.split(";")[0])); const mime = await detectMime({ buffer, headerMime: contentType }); - const ext = headerExt ?? extensionForMime(mime) ?? ""; + const ext = + headerExt ?? extensionForMime(mime) ?? safeOriginalFilenameExtension(originalFilename) ?? ""; const id = buildSavedMediaId({ baseId: uuid, ext, originalFilename }); await writeSavedMediaBuffer({ dir, id, buffer }); return buildSavedMediaResult({ dir, id, size: buffer.byteLength, contentType: mime }); diff --git a/ui/src/styles/chat/layout.css b/ui/src/styles/chat/layout.css index aa272cc7579..0ab920ec8fc 100644 --- a/ui/src/styles/chat/layout.css +++ b/ui/src/styles/chat/layout.css @@ -947,6 +947,10 @@ border: 1px solid var(--border); } +.chat-attachment-thumb--file { + width: 180px; +} + .chat-attachment-thumb img { width: 100%; height: 100%; @@ -974,13 +978,32 @@ .chat-attachment-file { display: flex; align-items: center; - gap: 4px; - padding: 4px; + gap: 8px; + width: 100%; + height: 100%; + padding: 8px 34px 8px 10px; + overflow: hidden; + font-size: 0.72rem; + color: var(--text); + background: var(--panel); +} + +.chat-attachment-file__icon { + display: inline-flex; + flex: 0 0 auto; + color: var(--muted); +} + +.chat-attachment-file__icon svg { + width: 16px; + height: 16px; +} + +.chat-attachment-file__name { + min-width: 0; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; - font-size: 0.72rem; - color: var(--muted); } .agent-chat__file-input { diff --git a/ui/src/ui/chat/attachment-support.ts b/ui/src/ui/chat/attachment-support.ts index 70deb1b4743..21b9ba951da 100644 --- a/ui/src/ui/chat/attachment-support.ts +++ b/ui/src/ui/chat/attachment-support.ts @@ -1,5 +1,14 @@ -export const CHAT_ATTACHMENT_ACCEPT = "image/*"; +export const CHAT_ATTACHMENT_ACCEPT = + "image/*,audio/*,application/pdf,text/*,.csv,.json,.md,.txt,.zip," + + ".doc,.docx,.xls,.xlsx,.ppt,.pptx"; export function isSupportedChatAttachmentMimeType(mimeType: string | null | undefined): boolean { - return typeof mimeType === "string" && mimeType.startsWith("image/"); + return typeof mimeType === "string" && !mimeType.startsWith("video/"); +} + +export function isSupportedChatAttachmentFile(file: Pick): boolean { + if (file.type.startsWith("video/")) { + return false; + } + return !/\.(?:avi|m4v|mov|mp4|mpeg|mpg|webm)$/i.test(file.name); } diff --git a/ui/src/ui/chat/grouped-render.test.ts b/ui/src/ui/chat/grouped-render.test.ts index 28c962f6560..67a116707ba 100644 --- a/ui/src/ui/chat/grouped-render.test.ts +++ b/ui/src/ui/chat/grouped-render.test.ts @@ -722,11 +722,16 @@ describe("grouped chat rendering", () => { id: "user-history-document", role: "user", content: "", - MediaPath: "/tmp/openclaw/user-upload.pdf", + MediaPath: "/__openclaw__/media/user-upload.pdf", MediaType: "application/pdf", timestamp: Date.now(), }); expect(container.querySelector(".chat-message-image")).toBeNull(); + const documentLink = container.querySelector( + ".chat-assistant-attachment-card__link", + ); + expect(documentLink?.textContent).toContain("user-upload.pdf"); + expect(documentLink?.getAttribute("href")).toBe("/__openclaw__/media/user-upload.pdf"); }); it("fetches managed chat images with auth and renders blob previews", async () => { diff --git a/ui/src/ui/chat/grouped-render.ts b/ui/src/ui/chat/grouped-render.ts index 4e9e59309eb..0630558b68b 100644 --- a/ui/src/ui/chat/grouped-render.ts +++ b/ui/src/ui/chat/grouped-render.ts @@ -118,6 +118,8 @@ type RenderableImageBlock = ImageBlock & { displayUrl: string; }; +type AttachmentItem = Extract; + const managedImageBlobUrlCache = new Map>(); const managedImageBlobUrlResolvedCache = new Map(); const managedImageBlobUrlMissCache = new Map(); @@ -169,6 +171,56 @@ function isImageTranscriptMediaPath(path: string, mediaType: unknown): boolean { ); } +function isAudioTranscriptMediaPath(path: string, mediaType: unknown): boolean { + if (typeof mediaType === "string" && mediaType.trim().toLowerCase().startsWith("audio/")) { + return true; + } + const ext = getFileExtension(path); + return ( + ext !== undefined && ["aac", "flac", "m4a", "mp3", "oga", "ogg", "opus", "wav"].includes(ext) + ); +} + +function isVideoTranscriptMediaPath(path: string, mediaType: unknown): boolean { + if (typeof mediaType === "string" && mediaType.trim().toLowerCase().startsWith("video/")) { + return true; + } + const ext = getFileExtension(path); + return ext !== undefined && ["m4v", "mov", "mp4", "webm"].includes(ext); +} + +function labelForMediaPath(mediaPath: string): string { + const trimmed = mediaPath.trim(); + try { + if (/^https?:\/\//i.test(trimmed)) { + const parsed = new URL(trimmed); + return parsed.pathname.split("/").pop()?.trim() || parsed.hostname || trimmed; + } + } catch {} + return trimmed.split(/[\\/]/).pop()?.trim() || trimmed; +} + +function extractTranscriptMediaEntries(message: unknown): Array<{ + path: string; + mediaType: unknown; +}> { + const m = message as Record; + const transcriptMediaPaths = Array.isArray(m.MediaPaths) + ? m.MediaPaths.filter((value): value is string => typeof value === "string") + : typeof m.MediaPath === "string" + ? [m.MediaPath] + : []; + const transcriptMediaTypes = Array.isArray(m.MediaTypes) + ? m.MediaTypes + : typeof m.MediaType === "string" + ? [m.MediaType] + : []; + return transcriptMediaPaths.map((mediaPath, index) => ({ + path: mediaPath, + mediaType: transcriptMediaTypes[index], + })); +} + function extractImages(message: unknown): ImageBlock[] { const m = message as Record; const content = m.content; @@ -232,18 +284,8 @@ function extractImages(message: unknown): ImageBlock[] { } } - const transcriptMediaPaths = Array.isArray(m.MediaPaths) - ? m.MediaPaths.filter((value): value is string => typeof value === "string") - : typeof m.MediaPath === "string" - ? [m.MediaPath] - : []; - const transcriptMediaTypes = Array.isArray(m.MediaTypes) - ? m.MediaTypes - : typeof m.MediaType === "string" - ? [m.MediaType] - : []; - for (const [index, mediaPath] of transcriptMediaPaths.entries()) { - if (!isImageTranscriptMediaPath(mediaPath, transcriptMediaTypes[index])) { + for (const { path: mediaPath, mediaType } of extractTranscriptMediaEntries(message)) { + if (!isImageTranscriptMediaPath(mediaPath, mediaType)) { continue; } appendImageBlock(images, { url: mediaPath }); @@ -252,6 +294,30 @@ function extractImages(message: unknown): ImageBlock[] { return images; } +function extractTranscriptAttachments(message: unknown): AttachmentItem[] { + const attachments: AttachmentItem[] = []; + for (const { path: mediaPath, mediaType } of extractTranscriptMediaEntries(message)) { + if (isImageTranscriptMediaPath(mediaPath, mediaType)) { + continue; + } + const kind = isAudioTranscriptMediaPath(mediaPath, mediaType) + ? "audio" + : isVideoTranscriptMediaPath(mediaPath, mediaType) + ? "video" + : "document"; + attachments.push({ + type: "attachment", + attachment: { + url: mediaPath, + kind, + label: labelForMediaPath(mediaPath), + ...(typeof mediaType === "string" ? { mimeType: mediaType } : {}), + }, + }); + } + return attachments; +} + export function renderReadingIndicatorGroup( assistant?: AssistantIdentity, basePath?: string, @@ -1042,7 +1108,7 @@ function renderAssistantAttachmentStatusCard(params: { } function renderAssistantAttachments( - attachments: Array>, + attachments: AttachmentItem[], localMediaPreviewRoots: readonly string[], basePath?: string, authToken?: string | null, @@ -1296,9 +1362,9 @@ function renderGroupedMessage( .join("\n") .trim(); const assistantAttachments = normalizedMessage.content.filter( - (item): item is Extract => - item.type === "attachment", + (item): item is AttachmentItem => item.type === "attachment", ); + const visibleAttachments = [...assistantAttachments, ...extractTranscriptAttachments(message)]; const assistantViewBlocks = normalizedMessage.content.filter( (item): item is Extract => item.type === "canvas", ); @@ -1329,7 +1395,7 @@ function renderGroupedMessage( !markdown && !visibleToolCards && !hasImages && - assistantAttachments.length === 0 && + visibleAttachments.length === 0 && assistantViewBlocks.length === 0 && !normalizedMessage.replyTarget ) { @@ -1390,7 +1456,7 @@ function renderGroupedMessage(
${renderMessageImages(images, imageRenderOptions)} ${renderAssistantAttachments( - assistantAttachments, + visibleAttachments, opts.localMediaPreviewRoots ?? [], opts.basePath, opts.assistantAttachmentAuthToken, @@ -1446,7 +1512,7 @@ function renderGroupedMessage( : html` ${renderMessageImages(images, imageRenderOptions)} ${renderAssistantAttachments( - assistantAttachments, + visibleAttachments, opts.localMediaPreviewRoots ?? [], opts.basePath, opts.assistantAttachmentAuthToken, diff --git a/ui/src/ui/controllers/chat.test.ts b/ui/src/ui/controllers/chat.test.ts index beb2fe1d745..3c0017cb4fd 100644 --- a/ui/src/ui/controllers/chat.test.ts +++ b/ui/src/ui/controllers/chat.test.ts @@ -624,6 +624,53 @@ describe("loadChatHistory", () => { }); describe("sendChatMessage", () => { + it("serializes non-image chat attachments as files", async () => { + const request = vi.fn().mockResolvedValue({ runId: "run-1", status: "started" }); + const state = createState({ + connected: true, + client: { request } as unknown as ChatState["client"], + }); + + const result = await sendChatMessage(state, "summarize", [ + { + id: "att-1", + dataUrl: `data:application/pdf;base64,${Buffer.from("%PDF-1.4\n").toString("base64")}`, + mimeType: "application/pdf", + fileName: "brief.pdf", + }, + ]); + + expect(result).toEqual(expect.any(String)); + expect(request).toHaveBeenCalledWith( + "chat.send", + expect.objectContaining({ + message: "summarize", + attachments: [ + { + type: "file", + mimeType: "application/pdf", + fileName: "brief.pdf", + content: Buffer.from("%PDF-1.4\n").toString("base64"), + }, + ], + }), + ); + expect(state.chatMessages[0]).toMatchObject({ + role: "user", + content: [ + { type: "text", text: "summarize" }, + { + type: "attachment", + attachment: { + kind: "document", + label: "brief.pdf", + mimeType: "application/pdf", + }, + }, + ], + }); + }); + it("formats structured non-auth connect failures for chat send", async () => { const request = vi.fn().mockRejectedValue( new GatewayRequestError({ diff --git a/ui/src/ui/controllers/chat.ts b/ui/src/ui/controllers/chat.ts index 5c29ae51944..862183d3abf 100644 --- a/ui/src/ui/controllers/chat.ts +++ b/ui/src/ui/controllers/chat.ts @@ -456,8 +456,9 @@ function buildApiAttachments(attachments?: ChatAttachment[]) { return null; } return { - type: "image", + type: parsed.mimeType.startsWith("image/") ? "image" : "file", mimeType: parsed.mimeType, + fileName: att.fileName, content: parsed.content, }; }) @@ -544,16 +545,38 @@ export async function sendChatMessage( const now = Date.now(); // Build user message content blocks - const contentBlocks: Array<{ type: string; text?: string; source?: unknown }> = []; + const contentBlocks: Array<{ + type: string; + text?: string; + source?: unknown; + attachment?: { + url: string; + kind: "audio" | "document"; + label: string; + mimeType?: string; + }; + }> = []; if (msg) { contentBlocks.push({ type: "text", text: msg }); } // Add image previews to the message for display if (hasAttachments) { for (const att of attachments) { + if (att.mimeType.startsWith("image/")) { + contentBlocks.push({ + type: "image", + source: { type: "base64", media_type: att.mimeType, data: att.dataUrl }, + }); + continue; + } contentBlocks.push({ - type: "image", - source: { type: "base64", media_type: att.mimeType, data: att.dataUrl }, + type: "attachment", + attachment: { + url: att.dataUrl, + kind: att.mimeType.startsWith("audio/") ? "audio" : "document", + label: att.fileName?.trim() || "Attached file", + mimeType: att.mimeType, + }, }); } } diff --git a/ui/src/ui/ui-types.ts b/ui/src/ui/ui-types.ts index 9fe1ce33fb9..c13f0123b0e 100644 --- a/ui/src/ui/ui-types.ts +++ b/ui/src/ui/ui-types.ts @@ -2,6 +2,7 @@ export type ChatAttachment = { id: string; dataUrl: string; mimeType: string; + fileName?: string; }; export type ChatQueueItem = { diff --git a/ui/src/ui/views/chat.test.ts b/ui/src/ui/views/chat.test.ts index 8bcc1564813..3a62433f1cd 100644 --- a/ui/src/ui/views/chat.test.ts +++ b/ui/src/ui/views/chat.test.ts @@ -432,6 +432,53 @@ describe("chat loading skeleton", () => { }); }); +describe("chat attachment picker", () => { + it("accepts and previews non-video file attachments", async () => { + const onAttachmentsChange = vi.fn(); + const container = renderChatView({ onAttachmentsChange }); + const input = container.querySelector(".agent-chat__file-input"); + const file = new File(["%PDF-1.4\n"], "brief.pdf", { type: "application/pdf" }); + + expect(input).not.toBeNull(); + Object.defineProperty(input!, "files", { + configurable: true, + value: [file], + }); + input?.dispatchEvent(new Event("change", { bubbles: true })); + + await vi.waitFor(() => { + expect(onAttachmentsChange).toHaveBeenCalledWith([ + expect.objectContaining({ + dataUrl: expect.stringMatching(/^data:application\/pdf;base64,/), + fileName: "brief.pdf", + mimeType: "application/pdf", + }), + ]); + }); + + const nextAttachments = onAttachmentsChange.mock.calls[0]?.[0] ?? []; + const preview = renderChatView({ attachments: nextAttachments }); + expect(preview.querySelector(".chat-attachment-thumb--file")).not.toBeNull(); + expect(preview.textContent).toContain("brief.pdf"); + }); + + it("filters video file attachments", () => { + const onAttachmentsChange = vi.fn(); + const container = renderChatView({ onAttachmentsChange }); + const input = container.querySelector(".agent-chat__file-input"); + const file = new File(["video"], "clip.mp4", { type: "video/mp4" }); + + expect(input).not.toBeNull(); + Object.defineProperty(input!, "files", { + configurable: true, + value: [file], + }); + input?.dispatchEvent(new Event("change", { bubbles: true })); + + expect(onAttachmentsChange).not.toHaveBeenCalled(); + }); +}); + describe("chat queue", () => { it("renders Steer only for queued messages during an active run", () => { const onQueueSteer = vi.fn(); diff --git a/ui/src/ui/views/chat.ts b/ui/src/ui/views/chat.ts index 5580edf509c..625fa8df636 100644 --- a/ui/src/ui/views/chat.ts +++ b/ui/src/ui/views/chat.ts @@ -4,7 +4,7 @@ import { repeat } from "lit/directives/repeat.js"; import type { CompactionStatus, FallbackStatus } from "../app-tool-stream.ts"; import { CHAT_ATTACHMENT_ACCEPT, - isSupportedChatAttachmentMimeType, + isSupportedChatAttachmentFile, } from "../chat/attachment-support.ts"; import { buildChatItems } from "../chat/build-chat-items.ts"; import { renderChatQueue } from "../chat/chat-queue.ts"; @@ -205,6 +205,19 @@ function generateAttachmentId(): string { return `att-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`; } +function chatAttachmentFromFile(file: File, dataUrl: string): ChatAttachment { + return { + id: generateAttachmentId(), + dataUrl, + mimeType: file.type || "application/octet-stream", + fileName: file.name || undefined, + }; +} + +function isImageAttachment(att: ChatAttachment): boolean { + return att.mimeType.startsWith("image/"); +} + function handlePaste(e: ClipboardEvent, props: ChatProps) { const items = e.clipboardData?.items; if (!items || !props.onAttachmentsChange) { @@ -229,11 +242,7 @@ function handlePaste(e: ClipboardEvent, props: ChatProps) { const reader = new FileReader(); reader.addEventListener("load", () => { const dataUrl = reader.result as string; - const newAttachment: ChatAttachment = { - id: generateAttachmentId(), - dataUrl, - mimeType: file.type, - }; + const newAttachment = chatAttachmentFromFile(file, dataUrl); const current = props.attachments ?? []; props.onAttachmentsChange?.([...current, newAttachment]); }); @@ -250,17 +259,13 @@ function handleFileSelect(e: Event, props: ChatProps) { const additions: ChatAttachment[] = []; let pending = 0; for (const file of input.files) { - if (!isSupportedChatAttachmentMimeType(file.type)) { + if (!isSupportedChatAttachmentFile(file)) { continue; } pending++; const reader = new FileReader(); reader.addEventListener("load", () => { - additions.push({ - id: generateAttachmentId(), - dataUrl: reader.result as string, - mimeType: file.type, - }); + additions.push(chatAttachmentFromFile(file, reader.result as string)); pending--; if (pending === 0) { props.onAttachmentsChange?.([...current, ...additions]); @@ -281,17 +286,13 @@ function handleDrop(e: DragEvent, props: ChatProps) { const additions: ChatAttachment[] = []; let pending = 0; for (const file of files) { - if (!isSupportedChatAttachmentMimeType(file.type)) { + if (!isSupportedChatAttachmentFile(file)) { continue; } pending++; const reader = new FileReader(); reader.addEventListener("load", () => { - additions.push({ - id: generateAttachmentId(), - dataUrl: reader.result as string, - mimeType: file.type, - }); + additions.push(chatAttachmentFromFile(file, reader.result as string)); pending--; if (pending === 0) { props.onAttachmentsChange?.([...current, ...additions]); @@ -310,8 +311,24 @@ function renderAttachmentPreview(props: ChatProps): TemplateResult | typeof noth
${attachments.map( (att) => html` -
- Attachment preview +
+ ${isImageAttachment(att) + ? html`Attachment preview` + : html` +
+ ${icons.paperclip} + ${att.fileName ?? "Attached file"} +
+ `}