diff --git a/CHANGELOG.md b/CHANGELOG.md index 89b5188c74b..4f3075a27a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -97,6 +97,7 @@ Docs: https://docs.openclaw.ai - Plugins/PDF: move local PDF extraction into a bundled `document-extract` plugin so core no longer owns `pdfjs-dist` or PDF image-rendering dependencies. Thanks @vincentkoc. - Dependencies/memory: stop installing `node-llama-cpp` by default; local embeddings now load it only when operators install the optional runtime package. Thanks @vincentkoc. - Dependencies/media: replace the tiny core media host's Express server with `node:http`, so Express is no longer a root runtime dependency. Thanks @vincentkoc. +- Dependencies/media: move Sharp-backed image attachment processing into the bundled media understanding runtime so core no longer owns the native image parser dependency. Thanks @vincentkoc. - Matrix: require full cross-signing identity trust for self-device verification and add `openclaw matrix verify self` so operators can establish that trust from the CLI. (#70401) Thanks @gumadeiras. - WebChat/sessions: keep runtime-only prompt context out of visible transcript history and scrub legacy wrappers from session history surfaces. Thanks @91wan. - Gradium: add a bundled text-to-speech provider with voice-note and telephony output support. (#64958) Thanks @LaurentMazare. diff --git a/extensions/media-understanding-core/image-ops.ts b/extensions/media-understanding-core/image-ops.ts new file mode 100644 index 00000000000..546e2e8149d --- /dev/null +++ b/extensions/media-understanding-core/image-ops.ts @@ -0,0 +1,137 @@ +import type { ImageMetadata } from "openclaw/plugin-sdk/media-runtime"; +import type sharpImport from "sharp"; + +type SharpFactory = typeof sharpImport; + +type ResizeToJpegParams = { + buffer: Buffer; + maxSide: number; + quality: number; + withoutEnlargement?: boolean; +}; + +type ResizeToPngParams = { + buffer: Buffer; + maxSide: number; + compressionLevel?: number; + withoutEnlargement?: boolean; +}; + +type MediaUnderstandingImageOpsOptions = { + maxInputPixels: number; +}; + +const SHARP_MODULE = "sharp"; + +let sharpFactoryPromise: Promise | null = null; + +function normalizeSharpFactory(mod: unknown): SharpFactory { + const candidates = [ + (mod as { default?: unknown }).default, + ((mod as { default?: { default?: unknown } }).default ?? {})?.default, + mod, + ]; + const sharp = candidates.find( + (candidate): candidate is SharpFactory => typeof candidate === "function", + ); + if (!sharp) { + throw new Error("Optional dependency sharp did not expose an image processor"); + } + return sharp; +} + +async function loadSharp(maxInputPixels: number): Promise { + if (!sharpFactoryPromise) { + sharpFactoryPromise = import(SHARP_MODULE) + .then((mod) => { + const sharp = normalizeSharpFactory(mod); + return ((buffer, options) => + sharp(buffer, { + ...options, + failOnError: false, + limitInputPixels: maxInputPixels, + })) as SharpFactory; + }) + .catch((err) => { + sharpFactoryPromise = null; + throw new Error("Optional dependency sharp is required for image attachment processing", { + cause: err, + }); + }); + } + return await sharpFactoryPromise; +} + +function normalizeMaxInputPixels(value: number): number { + if (!Number.isSafeInteger(value) || value <= 0) { + throw new Error("Media attachment image ops require a positive maxInputPixels budget"); + } + return value; +} + +function normalizeMetadata(meta: { width?: number; height?: number }): ImageMetadata | null { + const width = meta.width ?? 0; + const height = meta.height ?? 0; + if (!Number.isFinite(width) || !Number.isFinite(height)) { + return null; + } + if (width <= 0 || height <= 0) { + return null; + } + return { width, height }; +} + +export function createMediaAttachmentImageOps(options: MediaUnderstandingImageOpsOptions) { + const maxInputPixels = normalizeMaxInputPixels(options.maxInputPixels); + return { + async getImageMetadata(buffer: Buffer): Promise { + const sharp = await loadSharp(maxInputPixels); + return normalizeMetadata(await sharp(buffer).metadata()); + }, + + async normalizeExifOrientation(buffer: Buffer): Promise { + const sharp = await loadSharp(maxInputPixels); + return await sharp(buffer).rotate().toBuffer(); + }, + + async resizeToJpeg(params: ResizeToJpegParams): Promise { + const sharp = await loadSharp(maxInputPixels); + return await sharp(params.buffer) + .rotate() + .resize({ + width: params.maxSide, + height: params.maxSide, + fit: "inside", + withoutEnlargement: params.withoutEnlargement !== false, + }) + .jpeg({ quality: params.quality, mozjpeg: true }) + .toBuffer(); + }, + + async convertHeicToJpeg(buffer: Buffer): Promise { + const sharp = await loadSharp(maxInputPixels); + return await sharp(buffer).jpeg({ quality: 90, mozjpeg: true }).toBuffer(); + }, + + async hasAlphaChannel(buffer: Buffer): Promise { + const sharp = await loadSharp(maxInputPixels); + const meta = await sharp(buffer).metadata(); + return meta.hasAlpha || meta.channels === 4; + }, + + async resizeToPng(params: ResizeToPngParams): Promise { + const sharp = await loadSharp(maxInputPixels); + const compressionLevel = params.compressionLevel ?? 6; + return await sharp(params.buffer) + .rotate() + .resize({ + width: params.maxSide, + height: params.maxSide, + fit: "inside", + withoutEnlargement: params.withoutEnlargement !== false, + }) + .png({ compressionLevel }) + .toBuffer(); + }, + }; +} diff --git a/extensions/media-understanding-core/package.json b/extensions/media-understanding-core/package.json index 9101d4930a1..4a6c19f8103 100644 --- a/extensions/media-understanding-core/package.json +++ b/extensions/media-understanding-core/package.json @@ -4,7 +4,15 @@ "private": true, "description": "OpenClaw media understanding runtime package", "type": "module", + "dependencies": { + "sharp": "^0.34.5" + }, "devDependencies": { "@openclaw/plugin-sdk": "workspace:*" + }, + "openclaw": { + "bundle": { + "stageRuntimeDependencies": true + } } } diff --git a/package.json b/package.json index 19115b08d3f..384df656fbf 100644 --- a/package.json +++ b/package.json @@ -1636,7 +1636,6 @@ "osc-progress": "^0.3.0", "proxy-agent": "^8.0.1", "semver": "7.7.4", - "sharp": "^0.34.5", "sqlite-vec": "0.1.9", "tar": "7.5.13", "tslog": "^4.10.2", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 074e5e0c7b5..1ad3d60568a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -117,9 +117,6 @@ importers: semver: specifier: 7.7.4 version: 7.7.4 - sharp: - specifier: ^0.34.5 - version: 0.34.5 sqlite-vec: specifier: 0.1.9 version: 0.1.9 @@ -799,6 +796,10 @@ importers: version: link:../.. extensions/media-understanding-core: + dependencies: + sharp: + specifier: ^0.34.5 + version: 0.34.5 devDependencies: '@openclaw/plugin-sdk': specifier: workspace:* diff --git a/scripts/lib/dependency-ownership.json b/scripts/lib/dependency-ownership.json index fa64d161ed8..63f45a99167 100644 --- a/scripts/lib/dependency-ownership.json +++ b/scripts/lib/dependency-ownership.json @@ -164,8 +164,9 @@ "risk": ["version-parser"] }, "sharp": { - "owner": "capability:image-ops", - "class": "default-runtime-initially", + "owner": "plugin:media-understanding-core", + "class": "plugin-runtime", + "activation": ["media-understanding-core.image-ops"], "risk": ["native", "parser", "untrusted-files"] }, "sqlite-vec": { @@ -193,6 +194,11 @@ "class": "core-runtime", "risk": ["network"] }, + "web-push": { + "owner": "core:web-push", + "class": "core-runtime", + "risk": ["network", "push-notifications", "crypto"] + }, "ws": { "owner": "core:gateway-websocket", "class": "core-runtime", diff --git a/src/infra/tsdown-config.test.ts b/src/infra/tsdown-config.test.ts index 3154c399b50..f834b7ecc0e 100644 --- a/src/infra/tsdown-config.test.ts +++ b/src/infra/tsdown-config.test.ts @@ -41,6 +41,11 @@ function entryKeys(config: TsdownConfigEntry): string[] { return Object.keys(config.entry); } +function hasBundledPluginRuntimeEntry(config: TsdownConfigEntry): boolean { + const keys = entryKeys(config); + return keys.includes("index") || keys.includes("runtime-api"); +} + function bundledEntry(pluginId: string): string { return `${bundledPluginRoot(pluginId)}/index`; } @@ -82,12 +87,19 @@ describe("tsdown config", () => { ); expect(stagedGraphs.length).toBeGreaterThan(0); - expect(stagedGraphs.every((config) => entryKeys(config).includes("index"))).toBe(true); + expect(stagedGraphs.every(hasBundledPluginRuntimeEntry)).toBe(true); expect(stagedGraphs.every((config) => !entryKeys(config).includes("plugin-sdk/index"))).toBe( true, ); expect(stagedGraphs.some((config) => config.outDir === "dist/extensions/discord")).toBe(true); expect(stagedGraphs.some((config) => config.outDir === "dist/extensions/msteams")).toBe(true); + expect( + stagedGraphs.some( + (config) => + config.outDir === "dist/extensions/media-understanding-core" && + entryKeys(config).includes("image-ops"), + ), + ).toBe(true); }); it("does not emit plugin-sdk or hooks from a separate dist graph", () => { diff --git a/src/media/image-ops.ts b/src/media/image-ops.ts index cccc25efdeb..630cb56e9fa 100644 --- a/src/media/image-ops.ts +++ b/src/media/image-ops.ts @@ -3,16 +3,38 @@ import path from "node:path"; import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js"; import { runExec } from "../process/exec.js"; -type Sharp = typeof import("sharp"); -type SharpFactory = (buffer: Buffer) => ReturnType; - export type ImageMetadata = { width: number; height: number; }; +type MediaAttachmentImageOps = { + getImageMetadata(buffer: Buffer): Promise; + normalizeExifOrientation(buffer: Buffer): Promise; + resizeToJpeg(params: { + buffer: Buffer; + maxSide: number; + quality: number; + withoutEnlargement?: boolean; + }): Promise; + convertHeicToJpeg(buffer: Buffer): Promise; + hasAlphaChannel(buffer: Buffer): Promise; + resizeToPng(params: { + buffer: Buffer; + maxSide: number; + compressionLevel?: number; + withoutEnlargement?: boolean; + }): Promise; +}; + +type MediaAttachmentImageOpsModule = { + createMediaAttachmentImageOps?: (options: { maxInputPixels: number }) => MediaAttachmentImageOps; +}; + export const IMAGE_REDUCE_QUALITY_STEPS = [85, 75, 65, 55, 45, 35] as const; export const MAX_IMAGE_INPUT_PIXELS = 25_000_000; +const MEDIA_UNDERSTANDING_CORE_PLUGIN_ID = "media-understanding-core"; +const MEDIA_UNDERSTANDING_CORE_IMAGE_OPS_ARTIFACT = "image-ops.js"; export function buildImageResizeSideGrid(maxSide: number, sideStart: number): number[] { return [sideStart, 1800, 1600, 1400, 1200, 1000, 800] @@ -32,18 +54,47 @@ function prefersSips(): boolean { ); } -let sharpFactoryPromise: Promise | null = null; +let mediaAttachmentImageOpsPromise: Promise | null = null; -async function loadSharp(): Promise { - sharpFactoryPromise ??= import("sharp").then((mod) => { - const sharp = (mod as unknown as { default?: Sharp }).default ?? (mod as unknown as Sharp); - return (buffer: Buffer) => - sharp(buffer, { - failOnError: false, - limitInputPixels: MAX_IMAGE_INPUT_PIXELS, +function isMediaAttachmentImageOps(value: unknown): value is MediaAttachmentImageOps { + if (!value || typeof value !== "object") { + return false; + } + const candidate = value as Partial>; + return ( + typeof candidate.getImageMetadata === "function" && + typeof candidate.normalizeExifOrientation === "function" && + typeof candidate.resizeToJpeg === "function" && + typeof candidate.convertHeicToJpeg === "function" && + typeof candidate.hasAlphaChannel === "function" && + typeof candidate.resizeToPng === "function" + ); +} + +async function loadMediaAttachmentImageOps(): Promise { + if (!mediaAttachmentImageOpsPromise) { + mediaAttachmentImageOpsPromise = Promise.resolve() + .then(async () => { + const { loadBundledPluginPublicArtifactModuleSync } = + await import("../plugins/public-surface-loader.js"); + const mod = loadBundledPluginPublicArtifactModuleSync({ + dirName: MEDIA_UNDERSTANDING_CORE_PLUGIN_ID, + artifactBasename: MEDIA_UNDERSTANDING_CORE_IMAGE_OPS_ARTIFACT, + }); + const ops = mod.createMediaAttachmentImageOps?.({ + maxInputPixels: MAX_IMAGE_INPUT_PIXELS, + }); + if (!isMediaAttachmentImageOps(ops)) { + throw new Error("Media understanding core did not expose image ops"); + } + return ops; + }) + .catch((err) => { + mediaAttachmentImageOpsPromise = null; + throw err; }); - }); - return sharpFactoryPromise; + } + return await mediaAttachmentImageOpsPromise; } function isPositiveImageDimension(value: number): boolean { @@ -409,17 +460,9 @@ export async function getImageMetadata(buffer: Buffer): Promise } try { - const sharp = await loadSharp(); - // .rotate() with no args auto-rotates based on EXIF orientation - return await sharp(buffer).rotate().toBuffer(); + const ops = await loadMediaAttachmentImageOps(); + return await ops.normalizeExifOrientation(buffer); } catch { - // Sharp not available or failed - return original buffer return buffer; } } @@ -534,18 +575,8 @@ export async function resizeToJpeg(params: { }); } - const sharp = await loadSharp(); - // Use .rotate() BEFORE .resize() to auto-rotate based on EXIF orientation - return await sharp(params.buffer) - .rotate() // Auto-rotate based on EXIF before resizing - .resize({ - width: params.maxSide, - height: params.maxSide, - fit: "inside", - withoutEnlargement: params.withoutEnlargement !== false, - }) - .jpeg({ quality: params.quality, mozjpeg: true }) - .toBuffer(); + const ops = await loadMediaAttachmentImageOps(); + return await ops.resizeToJpeg(params); } export async function convertHeicToJpeg(buffer: Buffer): Promise { @@ -554,8 +585,8 @@ export async function convertHeicToJpeg(buffer: Buffer): Promise { if (prefersSips()) { return await sipsConvertToJpeg(buffer); } - const sharp = await loadSharp(); - return await sharp(buffer).jpeg({ quality: 90, mozjpeg: true }).toBuffer(); + const ops = await loadMediaAttachmentImageOps(); + return await ops.convertHeicToJpeg(buffer); } /** @@ -566,12 +597,8 @@ export async function hasAlphaChannel(buffer: Buffer): Promise { await assertImagePixelLimit(buffer); try { - const sharp = await loadSharp(); - const meta = await sharp(buffer).metadata(); - // Check if the image has an alpha channel - // PNG color types with alpha: 4 (grayscale+alpha), 6 (RGBA) - // Sharp reports this via 'channels' (4 = RGBA) or 'hasAlpha' - return meta.hasAlpha || meta.channels === 4; + const ops = await loadMediaAttachmentImageOps(); + return await ops.hasAlphaChannel(buffer); } catch { return false; } @@ -579,7 +606,7 @@ export async function hasAlphaChannel(buffer: Buffer): Promise { /** * Resizes an image to PNG format, preserving alpha channel (transparency). - * Falls back to sharp only (no sips fallback for PNG with alpha). + * Falls back to the media attachments plugin only (no sips fallback for PNG with alpha). */ export async function resizeToPng(params: { buffer: Buffer; @@ -589,20 +616,8 @@ export async function resizeToPng(params: { }): Promise { await assertImagePixelLimit(params.buffer); - const sharp = await loadSharp(); - // Compression level 6 is a good balance (0=fastest, 9=smallest) - const compressionLevel = params.compressionLevel ?? 6; - - return await sharp(params.buffer) - .rotate() // Auto-rotate based on EXIF if present - .resize({ - width: params.maxSide, - height: params.maxSide, - fit: "inside", - withoutEnlargement: params.withoutEnlargement !== false, - }) - .png({ compressionLevel }) - .toBuffer(); + const ops = await loadMediaAttachmentImageOps(); + return await ops.resizeToPng(params); } export async function optimizeImageToPng( diff --git a/src/plugins/bundled-dir.ts b/src/plugins/bundled-dir.ts index 63239f852e1..ecc025c1f85 100644 --- a/src/plugins/bundled-dir.ts +++ b/src/plugins/bundled-dir.ts @@ -139,11 +139,12 @@ export function resolveBundledPluginsDir(env: NodeJS.ProcessEnv = process.env): const preferSourceCheckout = Boolean(env.VITEST) || runningSourceTypeScriptProcess(); try { - const packageRoots = [ - resolveOpenClawPackageRootSync({ argv1: process.argv[1] }), - resolveOpenClawPackageRootSync({ cwd: process.cwd() }), - resolveOpenClawPackageRootSync({ moduleUrl: import.meta.url }), - ].filter( + const argvRoot = resolveOpenClawPackageRootSync({ argv1: process.argv[1] }); + const cwdRoot = resolveOpenClawPackageRootSync({ cwd: process.cwd() }); + const moduleRoot = resolveOpenClawPackageRootSync({ moduleUrl: import.meta.url }); + const packageRoots = ( + preferSourceCheckout ? [cwdRoot, argvRoot, moduleRoot] : [argvRoot, cwdRoot, moduleRoot] + ).filter( (entry, index, all): entry is string => Boolean(entry) && all.indexOf(entry) === index, ); for (const packageRoot of packageRoots) {