diff --git a/CHANGELOG.md b/CHANGELOG.md
index fe066f4fd48..ae15b4601c8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -97,6 +97,7 @@ Docs: https://docs.openclaw.ai
 - Google Meet: route stateful `google_meet` tool actions through the gateway-owned runtime so created or joined realtime sessions remain visible to status, speak, and leave after the agent turn ends. Fixes #72440. (#72441) Thanks @BsnizND.
 - Google Meet/Voice Call: send Gemini Live a non-blocking consult continuation before long OpenClaw agent consults finish, then deliver the final result when idle so calls and meetings do not sit silent during tool-backed answers. (#72189) Thanks @VACInc.
 - Google Meet: preserve Gemini Live function names when replying to realtime tool calls so Google SDK validation accepts the `FunctionResponse` payload. Fixes #72425. (#72426) Thanks @BsnizND.
+- Discord/media: keep incidental Markdown image badges in final replies as text unless a channel opts into Markdown-image media extraction, while preserving Telegram Markdown-image media replies and explicit `MEDIA:` attachments. Fixes #72642. Thanks @solavrc and @Bartok9.
 - Matrix/E2EE: stabilize recovery and broken-device QA flows while avoiding Matrix device-cleanup sync races that could leave shutdown-time crypto work running. Thanks @gumadeiras.
 - Cron: apply `cron.maxConcurrentRuns` to a dedicated `cron-nested` isolated agent-turn lane as well as cron dispatch, so parallel cron jobs no longer serialize on inner LLM execution while non-cron nested flows keep their existing lane behavior. Fixes #72707. Thanks @kagura-agent.
 - Cron: report isolated runs as successful when verified cron delivery already delivered the reply, while keeping unresolved Message/Canvas tool failures fatal. Fixes #72732 and #50170; follow-up to #54188. Thanks @zNatix, @pixeldyn, and @ChickenEggRoll.
diff --git a/docs/reference/rich-output-protocol.md b/docs/reference/rich-output-protocol.md
index c869dd20072..f31dd569f66 100644
--- a/docs/reference/rich-output-protocol.md
+++ b/docs/reference/rich-output-protocol.md
@@ -17,6 +17,10 @@ Remote `MEDIA:` attachments must be public `https:` URLs. Plain `http:`,
 loopback, link-local, private, and internal hostnames are ignored as attachment
 directives; server-side media fetchers still enforce their own network guards.
 
+Plain Markdown image syntax stays text by default. Channels that intentionally
+map Markdown image replies to media attachments opt in at their outbound
+adapter; Telegram does this so `![alt](url)` can still become a media reply.
+
 These directives are separate. `MEDIA:` and reply/voice tags remain delivery metadata; `[embed ...]` is the web-only rich render path.
 Trusted tool-result media uses the same `MEDIA:` / `[[audio_as_voice]]` parser before delivery, so text tool outputs can still mark an audio attachment as a voice note.
 
diff --git a/extensions/telegram/src/outbound-adapter.ts b/extensions/telegram/src/outbound-adapter.ts
index 4fb8d0395cd..03e32a888b7 100644
--- a/extensions/telegram/src/outbound-adapter.ts
+++ b/extensions/telegram/src/outbound-adapter.ts
@@ -121,6 +121,7 @@ export const telegramOutbound: ChannelOutboundAdapter = {
   deliveryMode: "direct",
   chunker: markdownToTelegramHtmlChunks,
   chunkerMode: "markdown",
+  extractMarkdownImages: true,
   textChunkLimit: TELEGRAM_TEXT_CHUNK_LIMIT,
   sanitizeText: ({ text }) => sanitizeForPlainText(text),
   shouldSkipPlainTextSanitization: ({ payload }) => Boolean(payload.channelData),
diff --git a/extensions/telegram/src/outbound-base.ts b/extensions/telegram/src/outbound-base.ts
index e6793c8c880..a351874ee5c 100644
--- a/extensions/telegram/src/outbound-base.ts
+++ b/extensions/telegram/src/outbound-base.ts
@@ -4,6 +4,7 @@ export const telegramOutboundBaseAdapter = {
   deliveryMode: "direct" as const,
   chunker: chunkMarkdownText,
   chunkerMode: "markdown" as const,
+  extractMarkdownImages: true,
   textChunkLimit: 4000,
   pollMaxOptions: 10,
 };
diff --git a/src/auto-reply/reply/agent-runner-payloads.test.ts b/src/auto-reply/reply/agent-runner-payloads.test.ts
index c29fa559ddd..b26f1360539 100644
--- a/src/auto-reply/reply/agent-runner-payloads.test.ts
+++ b/src/auto-reply/reply/agent-runner-payloads.test.ts
@@ -350,6 +350,7 @@ describe("buildReplyPayloads media filter integration", () => {
   it("extracts markdown image replies into final payload media urls", async () => {
     const { replyPayloads } = await buildReplyPayloads({
       ...baseParams,
+      extractMarkdownImages: true,
       payloads: [{ text: "Here you go\n\n![chart](https://example.com/chart.png)" }],
     });
 
@@ -364,6 +365,7 @@ describe("buildReplyPayloads media filter integration", () => {
   it("preserves inline caption text when lifting markdown image replies into media", async () => {
     const { replyPayloads } = await buildReplyPayloads({
       ...baseParams,
+      extractMarkdownImages: true,
       payloads: [{ text: 'Look ![chart](https://example.com/chart.png "Quarterly chart") now' }],
     });
 
@@ -379,6 +381,7 @@ describe("buildReplyPayloads media filter integration", () => {
     const text = "Look ![chart](file:///etc/passwd) now";
     const { replyPayloads } = await buildReplyPayloads({
       ...baseParams,
+      extractMarkdownImages: true,
       payloads: [{ text }],
     });
 
diff --git a/src/auto-reply/reply/agent-runner-payloads.ts b/src/auto-reply/reply/agent-runner-payloads.ts
index 1f6e7d2e000..4806f7b2228 100644
--- a/src/auto-reply/reply/agent-runner-payloads.ts
+++ b/src/auto-reply/reply/agent-runner-payloads.ts
@@ -107,6 +107,7 @@ export async function buildReplyPayloads(params: {
   originatingChannel?: OriginatingChannelType;
   originatingTo?: string;
   accountId?: string;
+  extractMarkdownImages?: boolean;
   normalizeMediaPaths?: (payload: ReplyPayload) => Promise<ReplyPayload>;
 }): Promise<{ replyPayloads: ReplyPayload[]; didLogHeartbeatStrip: boolean }> {
   let didLogHeartbeatStrip = params.didLogHeartbeatStrip;
@@ -148,6 +149,7 @@ export async function buildReplyPayloads(params: {
           currentMessageId: params.currentMessageId,
           silentToken: SILENT_REPLY_TOKEN,
           parseMode: "always",
+          extractMarkdownImages: params.extractMarkdownImages,
         });
         const mediaNormalizedPayload = await normalizeReplyPayloadMedia({
           payload: parsed.payload,
diff --git a/src/auto-reply/reply/reply-delivery.ts b/src/auto-reply/reply/reply-delivery.ts
index c20c6ec873d..9a428e16ec5 100644
--- a/src/auto-reply/reply/reply-delivery.ts
+++ b/src/auto-reply/reply/reply-delivery.ts
@@ -17,6 +17,7 @@ export function normalizeReplyPayloadDirectives(params: {
   silentToken?: string;
   trimLeadingWhitespace?: boolean;
   parseMode?: ReplyDirectiveParseMode;
+  extractMarkdownImages?: boolean;
 }): { payload: ReplyPayload; isSilent: boolean } {
   const parseMode = params.parseMode ?? "always";
   const silentToken = params.silentToken ?? SILENT_REPLY_TOKEN;
@@ -27,12 +28,14 @@ export function normalizeReplyPayloadDirectives(params: {
     (parseMode === "auto" &&
       (sourceText.includes("[[") ||
         /media:/i.test(sourceText) ||
+        (params.extractMarkdownImages === true && /!\[[^\]]*]\(/.test(sourceText)) ||
         sourceText.includes(silentToken)));
 
   const parsed = shouldParse
     ? parseReplyDirectives(sourceText, {
         currentMessageId: params.currentMessageId,
         silentToken,
+        extractMarkdownImages: params.extractMarkdownImages,
       })
     : undefined;
 
diff --git a/src/auto-reply/reply/reply-directives.ts b/src/auto-reply/reply/reply-directives.ts
index 14faf7e4e1d..586cda7c353 100644
--- a/src/auto-reply/reply/reply-directives.ts
+++ b/src/auto-reply/reply/reply-directives.ts
@@ -13,11 +13,19 @@ export type ReplyDirectiveParseResult = {
   isSilent: boolean;
 };
 
+export type ReplyDirectiveParseOptions = {
+  currentMessageId?: string;
+  silentToken?: string;
+  extractMarkdownImages?: boolean;
+};
+
 export function parseReplyDirectives(
   raw: string,
-  options: { currentMessageId?: string; silentToken?: string } = {},
+  options: ReplyDirectiveParseOptions = {},
 ): ReplyDirectiveParseResult {
-  const split = splitMediaFromOutput(raw);
+  const split = splitMediaFromOutput(raw, {
+    extractMarkdownImages: options.extractMarkdownImages,
+  });
   let text = split.text ?? "";
 
   const replyParsed = parseInlineDirectives(text, {
diff --git a/src/channels/plugins/outbound.types.ts b/src/channels/plugins/outbound.types.ts
index acc8425f2a8..75c381848b1 100644
--- a/src/channels/plugins/outbound.types.ts
+++ b/src/channels/plugins/outbound.types.ts
@@ -76,6 +76,8 @@ export type ChannelOutboundAdapter = {
   deliveryMode: "direct" | "gateway" | "hybrid";
   chunker?: ((text: string, limit: number, ctx?: ChannelOutboundChunkContext) => string[]) | null;
   chunkerMode?: "text" | "markdown";
+  /** Lift remote Markdown image syntax in text into outbound media attachments. */
+  extractMarkdownImages?: boolean;
   textChunkLimit?: number;
   sanitizeText?: (params: { text: string; payload: ReplyPayload }) => string;
   pollMaxOptions?: number;
diff --git a/src/infra/outbound/deliver.test.ts b/src/infra/outbound/deliver.test.ts
index 8bc2e980f56..26110e9a482 100644
--- a/src/infra/outbound/deliver.test.ts
+++ b/src/infra/outbound/deliver.test.ts
@@ -1235,6 +1235,54 @@ describe("deliverOutboundPayloads", () => {
     );
   });
 
+  it("keeps markdown images as text for channels that do not opt in", async () => {
+    const sendMatrix = vi.fn().mockResolvedValue({ messageId: "m-text", roomId: "!room" });
+
+    await deliverOutboundPayloads({
+      cfg: matrixChunkConfig,
+      channel: "matrix",
+      to: "!room:example",
+      payloads: [{ text: "Tech: ![Node.js](https://img.shields.io/badge/Node.js-339933)" }],
+      deps: { matrix: sendMatrix },
+    });
+
+    expect(sendMatrix).toHaveBeenCalledWith(
+      "!room:example",
+      "Tech: ![Node.js](https://img.shields.io/badge/Node.js-339933)",
+      expect.not.objectContaining({ mediaUrl: expect.any(String) }),
+    );
+  });
+
+  it("extracts markdown images for channels that opt in", async () => {
+    const sendMatrix = vi.fn().mockResolvedValue({ messageId: "m-media", roomId: "!room" });
+    setActivePluginRegistry(
+      createTestRegistry([
+        {
+          pluginId: "matrix",
+          source: "test",
+          plugin: createOutboundTestPlugin({
+            id: "matrix",
+            outbound: { ...matrixOutboundForTest, extractMarkdownImages: true },
+          }),
+        },
+      ]),
+    );
+
+    await deliverOutboundPayloads({
+      cfg: matrixChunkConfig,
+      channel: "matrix",
+      to: "!room:example",
+      payloads: [{ text: "Chart ![chart](https://example.com/chart.png) now" }],
+      deps: { matrix: sendMatrix },
+    });
+
+    expect(sendMatrix).toHaveBeenCalledWith(
+      "!room:example",
+      "Chart now",
+      expect.objectContaining({ mediaUrl: "https://example.com/chart.png" }),
+    );
+  });
+
   it("normalizes payloads and drops empty entries", () => {
     const normalized = normalizeOutboundPayloads([
       { text: "hi" },
diff --git a/src/infra/outbound/deliver.ts b/src/infra/outbound/deliver.ts
index 0724ee5390c..4ea06eac070 100644
--- a/src/infra/outbound/deliver.ts
+++ b/src/infra/outbound/deliver.ts
@@ -147,6 +147,24 @@ type ChannelHandlerParams = {
 };
 
 // Channel docking: outbound delivery delegates to plugin.outbound adapters.
+async function resolveChannelOutboundDirectiveOptions(params: {
+  cfg: OpenClawConfig;
+  channel: Exclude<OutboundChannel, "none">;
+}): Promise<{ extractMarkdownImages?: boolean }> {
+  let outbound = await loadChannelOutboundAdapter(params.channel);
+  if (!outbound) {
+    const { bootstrapOutboundChannelPlugin } = await loadChannelBootstrapRuntime();
+    bootstrapOutboundChannelPlugin({
+      channel: params.channel,
+      cfg: params.cfg,
+    });
+    outbound = await loadChannelOutboundAdapter(params.channel);
+  }
+  return {
+    extractMarkdownImages: outbound?.extractMarkdownImages === true ? true : undefined,
+  };
+}
+
 async function createChannelHandler(params: ChannelHandlerParams): Promise<ChannelHandler> {
   let outbound = await loadChannelOutboundAdapter(params.channel);
   if (!outbound) {
@@ -841,11 +859,13 @@ async function deliverOutboundPayloadsCore(
   params: DeliverOutboundPayloadsCoreParams,
 ): Promise<OutboundDeliveryResult[]> {
   const { cfg, channel, to, payloads } = params;
+  const directiveOptions = await resolveChannelOutboundDirectiveOptions({ cfg, channel });
   const outboundPayloadPlan = createOutboundPayloadPlan(payloads, {
     cfg,
     sessionKey: params.session?.policyKey ?? params.session?.key,
     surface: channel,
     conversationType: params.session?.conversationType,
+    extractMarkdownImages: directiveOptions.extractMarkdownImages,
   });
   const accountId = params.accountId;
   const deps = params.deps;
diff --git a/src/infra/outbound/payloads.test.ts b/src/infra/outbound/payloads.test.ts
index cb0785ef187..97b06015e5f 100644
--- a/src/infra/outbound/payloads.test.ts
+++ b/src/infra/outbound/payloads.test.ts
@@ -642,6 +642,44 @@ describe("OutboundPayloadPlan projections", () => {
     const plan = createOutboundPayloadPlan(matrix);
     expect(projectOutboundPayloadPlanForMirror(plan)).toEqual(resolveMirrorProjection(matrix));
   });
+
+  it("keeps markdown images as text unless extraction is enabled", () => {
+    const input = "Tech: ![Node.js](https://img.shields.io/badge/Node.js-339933)";
+
+    expect(
+      projectOutboundPayloadPlanForDelivery(createOutboundPayloadPlan([{ text: input }])),
+    ).toEqual([
+      {
+        text: input,
+        mediaUrl: undefined,
+        mediaUrls: undefined,
+        replyToId: undefined,
+        replyToCurrent: undefined,
+        replyToTag: false,
+        audioAsVoice: false,
+      },
+    ]);
+  });
+
+  it("extracts markdown images when the outbound channel opts in", () => {
+    const input = "Chart ![chart](https://example.com/chart.png) now";
+
+    expect(
+      projectOutboundPayloadPlanForDelivery(
+        createOutboundPayloadPlan([{ text: input }], { extractMarkdownImages: true }),
+      ),
+    ).toEqual([
+      {
+        text: "Chart now",
+        mediaUrl: "https://example.com/chart.png",
+        mediaUrls: ["https://example.com/chart.png"],
+        replyToId: undefined,
+        replyToCurrent: undefined,
+        replyToTag: false,
+        audioAsVoice: false,
+      },
+    ]);
+  });
 });
 
 describe("formatOutboundPayloadLog", () => {
diff --git a/src/infra/outbound/payloads.ts b/src/infra/outbound/payloads.ts
index c9a0fed521b..a67190a51bb 100644
--- a/src/infra/outbound/payloads.ts
+++ b/src/infra/outbound/payloads.ts
@@ -67,6 +67,7 @@ type OutboundPayloadPlanContext = {
    * (see `pending-spawn-query.ts`).
    */
   hasPendingSpawnedChildren?: boolean;
+  extractMarkdownImages?: boolean;
 };
 
 export type OutboundPayloadMirror = {
@@ -131,11 +132,14 @@ type PreparedOutboundPayloadPlanEntry = {
 
 function createOutboundPayloadPlanEntry(
   payload: ReplyPayload,
+  context: Pick<OutboundPayloadPlanContext, "extractMarkdownImages"> = {},
 ): PreparedOutboundPayloadPlanEntry | null {
   if (shouldSuppressReasoningPayload(payload)) {
     return null;
   }
-  const parsed = parseReplyDirectives(payload.text ?? "");
+  const parsed = parseReplyDirectives(payload.text ?? "", {
+    extractMarkdownImages: context.extractMarkdownImages,
+  });
   const explicitMediaUrls = payload.mediaUrls ?? parsed.mediaUrls;
   const explicitMediaUrl = payload.mediaUrl ?? parsed.mediaUrl;
   const mergedMedia = mergeMediaUrls(
@@ -193,7 +197,9 @@ export function createOutboundPayloadPlan(
     context.hasPendingSpawnedChildren ?? resolvePendingSpawnedChildren(context.sessionKey);
   const prepared: PreparedOutboundPayloadPlanEntry[] = [];
   for (const payload of payloads) {
-    const entry = createOutboundPayloadPlanEntry(payload);
+    const entry = createOutboundPayloadPlanEntry(payload, {
+      extractMarkdownImages: context.extractMarkdownImages,
+    });
     if (!entry) {
       continue;
     }
diff --git a/src/media/parse.test.ts b/src/media/parse.test.ts
index 7ac1f2b3d59..a01f931a2a7 100644
--- a/src/media/parse.test.ts
+++ b/src/media/parse.test.ts
@@ -1,5 +1,5 @@
 import { describe, expect, it } from "vitest";
-import { splitMediaFromOutput } from "./parse.js";
+import { splitMediaFromOutput, type SplitMediaFromOutputOptions } from "./parse.js";
 
 describe("splitMediaFromOutput", () => {
   function expectParsedMediaOutputCase(
@@ -9,8 +9,9 @@ describe("splitMediaFromOutput", () => {
       text?: string;
       audioAsVoice?: boolean;
     },
+    options?: SplitMediaFromOutputOptions,
   ) {
-    const result = splitMediaFromOutput(input);
+    const result = splitMediaFromOutput(input, options);
     expect(result.text).toBe(expected.text ?? "");
     if ("audioAsVoice" in expected) {
       expect(result.audioAsVoice).toBe(expected.audioAsVoice);
@@ -126,18 +127,36 @@ describe("splitMediaFromOutput", () => {
     ]);
   });
 
-  it("extracts markdown image urls while keeping surrounding caption text", () => {
-    expectParsedMediaOutputCase("Caption\n\n![chart](https://example.com/chart.png)", {
-      text: "Caption",
-      mediaUrls: ["https://example.com/chart.png"],
+  const extractMarkdownImages = { extractMarkdownImages: true } as const;
+
+  it("keeps markdown image urls as text by default", () => {
+    const input = "Caption\n\n![chart](https://example.com/chart.png)";
+    expectParsedMediaOutputCase(input, {
+      text: input,
+      mediaUrls: undefined,
     });
   });
 
-  it("keeps inline caption text around markdown images", () => {
-    expectParsedMediaOutputCase("Look ![chart](https://example.com/chart.png) now", {
-      text: "Look now",
-      mediaUrls: ["https://example.com/chart.png"],
-    });
+  it("extracts markdown image urls while keeping surrounding caption text when enabled", () => {
+    expectParsedMediaOutputCase(
+      "Caption\n\n![chart](https://example.com/chart.png)",
+      {
+        text: "Caption",
+        mediaUrls: ["https://example.com/chart.png"],
+      },
+      extractMarkdownImages,
+    );
+  });
+
+  it("keeps inline caption text around markdown images when enabled", () => {
+    expectParsedMediaOutputCase(
+      "Look ![chart](https://example.com/chart.png) now",
+      {
+        text: "Look now",
+        mediaUrls: ["https://example.com/chart.png"],
+      },
+      extractMarkdownImages,
+    );
   });
 
   it("extracts multiple markdown image urls in order", () => {
@@ -147,6 +166,7 @@ describe("splitMediaFromOutput", () => {
         text: "Before\nMiddle\nAfter",
         mediaUrls: ["https://example.com/one.png", "https://example.com/two.png"],
       },
+      extractMarkdownImages,
     );
   });
 
@@ -157,14 +177,19 @@ describe("splitMediaFromOutput", () => {
         text: "Caption",
         mediaUrls: ["https://example.com/chart.png"],
       },
+      extractMarkdownImages,
     );
   });
 
   it("keeps balanced parentheses inside markdown image urls", () => {
-    expectParsedMediaOutputCase("Chart ![img](https://example.com/a_(1).png) now", {
-      text: "Chart now",
-      mediaUrls: ["https://example.com/a_(1).png"],
-    });
+    expectParsedMediaOutputCase(
+      "Chart ![img](https://example.com/a_(1).png) now",
+      {
+        text: "Chart now",
+        mediaUrls: ["https://example.com/a_(1).png"],
+      },
+      extractMarkdownImages,
+    );
   });
 
   it.each([
@@ -174,27 +199,76 @@ describe("splitMediaFromOutput", () => {
     "![x](http://example.com/a.png)",
     "![x](https://127.0.0.1/a.png)",
   ] as const)("does not lift local markdown image target: %s", (input) => {
-    expectParsedMediaOutputCase(input, {
-      text: input,
-      mediaUrls: undefined,
-    });
+    expectParsedMediaOutputCase(
+      input,
+      {
+        text: input,
+        mediaUrls: undefined,
+      },
+      extractMarkdownImages,
+    );
   });
 
   it("does not lift markdown image urls that fail media validation", () => {
     const longUrl = `![x](https://example.com/${"a".repeat(4097)}.png)`;
 
-    expectParsedMediaOutputCase(longUrl, {
-      text: longUrl,
-      mediaUrls: undefined,
-    });
+    expectParsedMediaOutputCase(
+      longUrl,
+      {
+        text: longUrl,
+        mediaUrls: undefined,
+      },
+      extractMarkdownImages,
+    );
   });
 
   it("leaves very long markdown-image candidate lines as text", () => {
     const input = `${"prefix ".repeat(3000)}![x](https://example.com/image.png)`;
 
+    expectParsedMediaOutputCase(
+      input,
+      {
+        text: input,
+        mediaUrls: undefined,
+      },
+      extractMarkdownImages,
+    );
+  });
+
+  it.each([
+    "![Node.js](https://img.shields.io/badge/Node.js-339933?logo=node.js&logoColor=white)",
+    "![build](https://img.shields.io/github/actions/workflow/status/owner/repo/ci.yml)",
+    "![npm](https://badge.fury.io/js/some-package.svg)",
+    "![badgen](https://badgen.net/npm/v/some-package)",
+    "![CI](https://github.com/owner/repo/actions/workflows/ci.yml/badge.svg)",
+    "![flat-badge](https://flat.badgen.net/npm/v/some-package)",
+  ] as const)("keeps markdown badge image as text by default: %s", (input) => {
     expectParsedMediaOutputCase(input, {
       text: input,
       mediaUrls: undefined,
     });
   });
+
+  it("keeps surrounding text around inline badge images by default", () => {
+    expectParsedMediaOutputCase(
+      "tech: ![Node.js](https://img.shields.io/badge/Node.js-339933?logo=node.js&logoColor=white) stack",
+      {
+        text: "tech: ![Node.js](https://img.shields.io/badge/Node.js-339933?logo=node.js&logoColor=white) stack",
+        mediaUrls: undefined,
+      },
+    );
+  });
+
+  it("still extracts markdown images when explicitly enabled", () => {
+    expectParsedMediaOutputCase(
+      "![badge](https://img.shields.io/badge/status-passing-green)\n![photo](https://example.com/photo.png)",
+      {
+        mediaUrls: [
+          "https://img.shields.io/badge/status-passing-green",
+          "https://example.com/photo.png",
+        ],
+      },
+      extractMarkdownImages,
+    );
+  });
 });
diff --git a/src/media/parse.ts b/src/media/parse.ts
index c725c191370..8d832447bda 100644
--- a/src/media/parse.ts
+++ b/src/media/parse.ts
@@ -26,6 +26,10 @@ export type ParsedMediaOutputSegment =
       url: string;
     };
 
+export type SplitMediaFromOutputOptions = {
+  extractMarkdownImages?: boolean;
+};
+
 export function normalizeMediaSource(src: string) {
   return src.startsWith("file://") ? src.replace("file://", "") : src;
 }
@@ -462,7 +466,10 @@ function isInsideFence(fenceSpans: Array<{ start: number; end: number }>, offset
   return fenceSpans.some((span) => offset >= span.start && offset < span.end);
 }
 
-export function splitMediaFromOutput(raw: string): {
+export function splitMediaFromOutput(
+  raw: string,
+  options: SplitMediaFromOutputOptions = {},
+): {
   text: string;
   mediaUrls?: string[];
   mediaUrl?: string; // legacy first item for backward compatibility
@@ -475,8 +482,9 @@ export function splitMediaFromOutput(raw: string): {
   if (!trimmedRaw.trim()) {
     return { text: "" };
   }
+  const extractMarkdownImages = options.extractMarkdownImages === true;
   const mayContainMediaToken = /media:/i.test(trimmedRaw);
-  const mayContainMarkdownImage = /!\[[^\]]*]\(/.test(trimmedRaw);
+  const mayContainMarkdownImage = extractMarkdownImages && /!\[[^\]]*]\(/.test(trimmedRaw);
   const mayContainAudioTag = trimmedRaw.includes("[[");
   if (!mayContainMediaToken && !mayContainMarkdownImage && !mayContainAudioTag) {
     return { text: trimmedRaw };
@@ -518,7 +526,9 @@ export function splitMediaFromOutput(raw: string): {
 
     const trimmedStart = line.trimStart();
     if (!trimmedStart.toUpperCase().startsWith("MEDIA:")) {
-      const markdownImageResult = collectMarkdownImageSegments({ line, media });
+      const markdownImageResult = extractMarkdownImages
+        ? collectMarkdownImageSegments({ line, media })
+        : { lineSegments: [], foundMedia: false };
       if (!markdownImageResult.foundMedia) {
         keptLines.push(line);
         pushTextSegment(line);