refactor(media): move sharp image ops into media runtime (#71519)

* refactor(media): move sharp image ops into plugin

* fix(media): pass image pixel budget to sharp plugin

* refactor(media): reuse media understanding sharp runtime

* test(build): allow staged runtime core graphs
This commit is contained in:
Vincent Koc
2026-04-25 04:31:10 -07:00
committed by GitHub
parent b2b898c2a8
commit e174d96cc0
9 changed files with 255 additions and 75 deletions

View File

@@ -97,6 +97,7 @@ Docs: https://docs.openclaw.ai
- Plugins/PDF: move local PDF extraction into a bundled `document-extract` plugin so core no longer owns `pdfjs-dist` or PDF image-rendering dependencies. Thanks @vincentkoc.
- Dependencies/memory: stop installing `node-llama-cpp` by default; local embeddings now load it only when operators install the optional runtime package. Thanks @vincentkoc.
- Dependencies/media: replace the tiny core media host's Express server with `node:http`, so Express is no longer a root runtime dependency. Thanks @vincentkoc.
- Dependencies/media: move Sharp-backed image attachment processing into the bundled media understanding runtime so core no longer owns the native image parser dependency. Thanks @vincentkoc.
- Matrix: require full cross-signing identity trust for self-device verification and add `openclaw matrix verify self` so operators can establish that trust from the CLI. (#70401) Thanks @gumadeiras.
- WebChat/sessions: keep runtime-only prompt context out of visible transcript history and scrub legacy wrappers from session history surfaces. Thanks @91wan.
- Gradium: add a bundled text-to-speech provider with voice-note and telephony output support. (#64958) Thanks @LaurentMazare.

View File

@@ -0,0 +1,137 @@
import type { ImageMetadata } from "openclaw/plugin-sdk/media-runtime";
import type sharpImport from "sharp";
type SharpFactory = typeof sharpImport;
type ResizeToJpegParams = {
buffer: Buffer;
maxSide: number;
quality: number;
withoutEnlargement?: boolean;
};
type ResizeToPngParams = {
buffer: Buffer;
maxSide: number;
compressionLevel?: number;
withoutEnlargement?: boolean;
};
type MediaUnderstandingImageOpsOptions = {
maxInputPixels: number;
};
const SHARP_MODULE = "sharp";
let sharpFactoryPromise: Promise<SharpFactory> | null = null;
function normalizeSharpFactory(mod: unknown): SharpFactory {
const candidates = [
(mod as { default?: unknown }).default,
((mod as { default?: { default?: unknown } }).default ?? {})?.default,
mod,
];
const sharp = candidates.find(
(candidate): candidate is SharpFactory => typeof candidate === "function",
);
if (!sharp) {
throw new Error("Optional dependency sharp did not expose an image processor");
}
return sharp;
}
async function loadSharp(maxInputPixels: number): Promise<SharpFactory> {
if (!sharpFactoryPromise) {
sharpFactoryPromise = import(SHARP_MODULE)
.then((mod) => {
const sharp = normalizeSharpFactory(mod);
return ((buffer, options) =>
sharp(buffer, {
...options,
failOnError: false,
limitInputPixels: maxInputPixels,
})) as SharpFactory;
})
.catch((err) => {
sharpFactoryPromise = null;
throw new Error("Optional dependency sharp is required for image attachment processing", {
cause: err,
});
});
}
return await sharpFactoryPromise;
}
function normalizeMaxInputPixels(value: number): number {
if (!Number.isSafeInteger(value) || value <= 0) {
throw new Error("Media attachment image ops require a positive maxInputPixels budget");
}
return value;
}
function normalizeMetadata(meta: { width?: number; height?: number }): ImageMetadata | null {
const width = meta.width ?? 0;
const height = meta.height ?? 0;
if (!Number.isFinite(width) || !Number.isFinite(height)) {
return null;
}
if (width <= 0 || height <= 0) {
return null;
}
return { width, height };
}
export function createMediaAttachmentImageOps(options: MediaUnderstandingImageOpsOptions) {
const maxInputPixels = normalizeMaxInputPixels(options.maxInputPixels);
return {
async getImageMetadata(buffer: Buffer): Promise<ImageMetadata | null> {
const sharp = await loadSharp(maxInputPixels);
return normalizeMetadata(await sharp(buffer).metadata());
},
async normalizeExifOrientation(buffer: Buffer): Promise<Buffer> {
const sharp = await loadSharp(maxInputPixels);
return await sharp(buffer).rotate().toBuffer();
},
async resizeToJpeg(params: ResizeToJpegParams): Promise<Buffer> {
const sharp = await loadSharp(maxInputPixels);
return await sharp(params.buffer)
.rotate()
.resize({
width: params.maxSide,
height: params.maxSide,
fit: "inside",
withoutEnlargement: params.withoutEnlargement !== false,
})
.jpeg({ quality: params.quality, mozjpeg: true })
.toBuffer();
},
async convertHeicToJpeg(buffer: Buffer): Promise<Buffer> {
const sharp = await loadSharp(maxInputPixels);
return await sharp(buffer).jpeg({ quality: 90, mozjpeg: true }).toBuffer();
},
async hasAlphaChannel(buffer: Buffer): Promise<boolean> {
const sharp = await loadSharp(maxInputPixels);
const meta = await sharp(buffer).metadata();
return meta.hasAlpha || meta.channels === 4;
},
async resizeToPng(params: ResizeToPngParams): Promise<Buffer> {
const sharp = await loadSharp(maxInputPixels);
const compressionLevel = params.compressionLevel ?? 6;
return await sharp(params.buffer)
.rotate()
.resize({
width: params.maxSide,
height: params.maxSide,
fit: "inside",
withoutEnlargement: params.withoutEnlargement !== false,
})
.png({ compressionLevel })
.toBuffer();
},
};
}

View File

@@ -4,7 +4,15 @@
"private": true,
"description": "OpenClaw media understanding runtime package",
"type": "module",
"dependencies": {
"sharp": "^0.34.5"
},
"devDependencies": {
"@openclaw/plugin-sdk": "workspace:*"
},
"openclaw": {
"bundle": {
"stageRuntimeDependencies": true
}
}
}

View File

@@ -1636,7 +1636,6 @@
"osc-progress": "^0.3.0",
"proxy-agent": "^8.0.1",
"semver": "7.7.4",
"sharp": "^0.34.5",
"sqlite-vec": "0.1.9",
"tar": "7.5.13",
"tslog": "^4.10.2",

7
pnpm-lock.yaml generated
View File

@@ -117,9 +117,6 @@ importers:
semver:
specifier: 7.7.4
version: 7.7.4
sharp:
specifier: ^0.34.5
version: 0.34.5
sqlite-vec:
specifier: 0.1.9
version: 0.1.9
@@ -799,6 +796,10 @@ importers:
version: link:../..
extensions/media-understanding-core:
dependencies:
sharp:
specifier: ^0.34.5
version: 0.34.5
devDependencies:
'@openclaw/plugin-sdk':
specifier: workspace:*

View File

@@ -164,8 +164,9 @@
"risk": ["version-parser"]
},
"sharp": {
"owner": "capability:image-ops",
"class": "default-runtime-initially",
"owner": "plugin:media-understanding-core",
"class": "plugin-runtime",
"activation": ["media-understanding-core.image-ops"],
"risk": ["native", "parser", "untrusted-files"]
},
"sqlite-vec": {
@@ -193,6 +194,11 @@
"class": "core-runtime",
"risk": ["network"]
},
"web-push": {
"owner": "core:web-push",
"class": "core-runtime",
"risk": ["network", "push-notifications", "crypto"]
},
"ws": {
"owner": "core:gateway-websocket",
"class": "core-runtime",

View File

@@ -41,6 +41,11 @@ function entryKeys(config: TsdownConfigEntry): string[] {
return Object.keys(config.entry);
}
function hasBundledPluginRuntimeEntry(config: TsdownConfigEntry): boolean {
const keys = entryKeys(config);
return keys.includes("index") || keys.includes("runtime-api");
}
function bundledEntry(pluginId: string): string {
return `${bundledPluginRoot(pluginId)}/index`;
}
@@ -82,12 +87,19 @@ describe("tsdown config", () => {
);
expect(stagedGraphs.length).toBeGreaterThan(0);
expect(stagedGraphs.every((config) => entryKeys(config).includes("index"))).toBe(true);
expect(stagedGraphs.every(hasBundledPluginRuntimeEntry)).toBe(true);
expect(stagedGraphs.every((config) => !entryKeys(config).includes("plugin-sdk/index"))).toBe(
true,
);
expect(stagedGraphs.some((config) => config.outDir === "dist/extensions/discord")).toBe(true);
expect(stagedGraphs.some((config) => config.outDir === "dist/extensions/msteams")).toBe(true);
expect(
stagedGraphs.some(
(config) =>
config.outDir === "dist/extensions/media-understanding-core" &&
entryKeys(config).includes("image-ops"),
),
).toBe(true);
});
it("does not emit plugin-sdk or hooks from a separate dist graph", () => {

View File

@@ -3,16 +3,38 @@ import path from "node:path";
import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
import { runExec } from "../process/exec.js";
type Sharp = typeof import("sharp");
type SharpFactory = (buffer: Buffer) => ReturnType<Sharp>;
export type ImageMetadata = {
width: number;
height: number;
};
type MediaAttachmentImageOps = {
getImageMetadata(buffer: Buffer): Promise<ImageMetadata | null>;
normalizeExifOrientation(buffer: Buffer): Promise<Buffer>;
resizeToJpeg(params: {
buffer: Buffer;
maxSide: number;
quality: number;
withoutEnlargement?: boolean;
}): Promise<Buffer>;
convertHeicToJpeg(buffer: Buffer): Promise<Buffer>;
hasAlphaChannel(buffer: Buffer): Promise<boolean>;
resizeToPng(params: {
buffer: Buffer;
maxSide: number;
compressionLevel?: number;
withoutEnlargement?: boolean;
}): Promise<Buffer>;
};
type MediaAttachmentImageOpsModule = {
createMediaAttachmentImageOps?: (options: { maxInputPixels: number }) => MediaAttachmentImageOps;
};
export const IMAGE_REDUCE_QUALITY_STEPS = [85, 75, 65, 55, 45, 35] as const;
export const MAX_IMAGE_INPUT_PIXELS = 25_000_000;
const MEDIA_UNDERSTANDING_CORE_PLUGIN_ID = "media-understanding-core";
const MEDIA_UNDERSTANDING_CORE_IMAGE_OPS_ARTIFACT = "image-ops.js";
export function buildImageResizeSideGrid(maxSide: number, sideStart: number): number[] {
return [sideStart, 1800, 1600, 1400, 1200, 1000, 800]
@@ -32,18 +54,47 @@ function prefersSips(): boolean {
);
}
let sharpFactoryPromise: Promise<SharpFactory> | null = null;
let mediaAttachmentImageOpsPromise: Promise<MediaAttachmentImageOps> | null = null;
async function loadSharp(): Promise<SharpFactory> {
sharpFactoryPromise ??= import("sharp").then((mod) => {
const sharp = (mod as unknown as { default?: Sharp }).default ?? (mod as unknown as Sharp);
return (buffer: Buffer) =>
sharp(buffer, {
failOnError: false,
limitInputPixels: MAX_IMAGE_INPUT_PIXELS,
function isMediaAttachmentImageOps(value: unknown): value is MediaAttachmentImageOps {
if (!value || typeof value !== "object") {
return false;
}
const candidate = value as Partial<Record<keyof MediaAttachmentImageOps, unknown>>;
return (
typeof candidate.getImageMetadata === "function" &&
typeof candidate.normalizeExifOrientation === "function" &&
typeof candidate.resizeToJpeg === "function" &&
typeof candidate.convertHeicToJpeg === "function" &&
typeof candidate.hasAlphaChannel === "function" &&
typeof candidate.resizeToPng === "function"
);
}
async function loadMediaAttachmentImageOps(): Promise<MediaAttachmentImageOps> {
if (!mediaAttachmentImageOpsPromise) {
mediaAttachmentImageOpsPromise = Promise.resolve()
.then(async () => {
const { loadBundledPluginPublicArtifactModuleSync } =
await import("../plugins/public-surface-loader.js");
const mod = loadBundledPluginPublicArtifactModuleSync<MediaAttachmentImageOpsModule>({
dirName: MEDIA_UNDERSTANDING_CORE_PLUGIN_ID,
artifactBasename: MEDIA_UNDERSTANDING_CORE_IMAGE_OPS_ARTIFACT,
});
const ops = mod.createMediaAttachmentImageOps?.({
maxInputPixels: MAX_IMAGE_INPUT_PIXELS,
});
if (!isMediaAttachmentImageOps(ops)) {
throw new Error("Media understanding core did not expose image ops");
}
return ops;
})
.catch((err) => {
mediaAttachmentImageOpsPromise = null;
throw err;
});
});
return sharpFactoryPromise;
}
return await mediaAttachmentImageOpsPromise;
}
function isPositiveImageDimension(value: number): boolean {
@@ -409,17 +460,9 @@ export async function getImageMetadata(buffer: Buffer): Promise<ImageMetadata |
}
try {
const sharp = await loadSharp();
const meta = await sharp(buffer).metadata();
const width = meta.width ?? 0;
const height = meta.height ?? 0;
if (!Number.isFinite(width) || !Number.isFinite(height)) {
return null;
}
if (width <= 0 || height <= 0) {
return null;
}
return validateImagePixelLimit({ width, height });
const ops = await loadMediaAttachmentImageOps();
const meta = await ops.getImageMetadata(buffer);
return meta ? validateImagePixelLimit(meta) : null;
} catch {
return null;
}
@@ -492,11 +535,9 @@ export async function normalizeExifOrientation(buffer: Buffer): Promise<Buffer>
}
try {
const sharp = await loadSharp();
// .rotate() with no args auto-rotates based on EXIF orientation
return await sharp(buffer).rotate().toBuffer();
const ops = await loadMediaAttachmentImageOps();
return await ops.normalizeExifOrientation(buffer);
} catch {
// Sharp not available or failed - return original buffer
return buffer;
}
}
@@ -534,18 +575,8 @@ export async function resizeToJpeg(params: {
});
}
const sharp = await loadSharp();
// Use .rotate() BEFORE .resize() to auto-rotate based on EXIF orientation
return await sharp(params.buffer)
.rotate() // Auto-rotate based on EXIF before resizing
.resize({
width: params.maxSide,
height: params.maxSide,
fit: "inside",
withoutEnlargement: params.withoutEnlargement !== false,
})
.jpeg({ quality: params.quality, mozjpeg: true })
.toBuffer();
const ops = await loadMediaAttachmentImageOps();
return await ops.resizeToJpeg(params);
}
export async function convertHeicToJpeg(buffer: Buffer): Promise<Buffer> {
@@ -554,8 +585,8 @@ export async function convertHeicToJpeg(buffer: Buffer): Promise<Buffer> {
if (prefersSips()) {
return await sipsConvertToJpeg(buffer);
}
const sharp = await loadSharp();
return await sharp(buffer).jpeg({ quality: 90, mozjpeg: true }).toBuffer();
const ops = await loadMediaAttachmentImageOps();
return await ops.convertHeicToJpeg(buffer);
}
/**
@@ -566,12 +597,8 @@ export async function hasAlphaChannel(buffer: Buffer): Promise<boolean> {
await assertImagePixelLimit(buffer);
try {
const sharp = await loadSharp();
const meta = await sharp(buffer).metadata();
// Check if the image has an alpha channel
// PNG color types with alpha: 4 (grayscale+alpha), 6 (RGBA)
// Sharp reports this via 'channels' (4 = RGBA) or 'hasAlpha'
return meta.hasAlpha || meta.channels === 4;
const ops = await loadMediaAttachmentImageOps();
return await ops.hasAlphaChannel(buffer);
} catch {
return false;
}
@@ -579,7 +606,7 @@ export async function hasAlphaChannel(buffer: Buffer): Promise<boolean> {
/**
* Resizes an image to PNG format, preserving alpha channel (transparency).
* Falls back to sharp only (no sips fallback for PNG with alpha).
* Falls back to the media attachments plugin only (no sips fallback for PNG with alpha).
*/
export async function resizeToPng(params: {
buffer: Buffer;
@@ -589,20 +616,8 @@ export async function resizeToPng(params: {
}): Promise<Buffer> {
await assertImagePixelLimit(params.buffer);
const sharp = await loadSharp();
// Compression level 6 is a good balance (0=fastest, 9=smallest)
const compressionLevel = params.compressionLevel ?? 6;
return await sharp(params.buffer)
.rotate() // Auto-rotate based on EXIF if present
.resize({
width: params.maxSide,
height: params.maxSide,
fit: "inside",
withoutEnlargement: params.withoutEnlargement !== false,
})
.png({ compressionLevel })
.toBuffer();
const ops = await loadMediaAttachmentImageOps();
return await ops.resizeToPng(params);
}
export async function optimizeImageToPng(

View File

@@ -139,11 +139,12 @@ export function resolveBundledPluginsDir(env: NodeJS.ProcessEnv = process.env):
const preferSourceCheckout = Boolean(env.VITEST) || runningSourceTypeScriptProcess();
try {
const packageRoots = [
resolveOpenClawPackageRootSync({ argv1: process.argv[1] }),
resolveOpenClawPackageRootSync({ cwd: process.cwd() }),
resolveOpenClawPackageRootSync({ moduleUrl: import.meta.url }),
].filter(
const argvRoot = resolveOpenClawPackageRootSync({ argv1: process.argv[1] });
const cwdRoot = resolveOpenClawPackageRootSync({ cwd: process.cwd() });
const moduleRoot = resolveOpenClawPackageRootSync({ moduleUrl: import.meta.url });
const packageRoots = (
preferSourceCheckout ? [cwdRoot, argvRoot, moduleRoot] : [argvRoot, cwdRoot, moduleRoot]
).filter(
(entry, index, all): entry is string => Boolean(entry) && all.indexOf(entry) === index,
);
for (const packageRoot of packageRoots) {