fix(msteams): decode & last in stripHtmlFromTeamsMessage to avoid double-decoding (#96342)

stripHtmlFromTeamsMessage decoded & FIRST, so literal entity text the user typed (which Microsoft Graph returns double-encoded, e.g. &lt;) got re-decoded into markup: "The token is &lt;APIKEY&gt;" became "The token is <APIKEY>" instead of the correct "The token is <APIKEY>". Reorder so & is decoded last, mirroring the documented ordering in decodeHtmlEntities (inbound.ts), whose comment already states it 'must be last to prevent double-decoding (e.g. &lt; -> < not <)'. Behavior-preserving for all singly-encoded input; the existing entity test is unchanged. Co-authored-by: ly-wang19 <ly-wang19@users.noreply.github.com> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-25 10:19:33 +00:00 · 2026-06-24 21:34:40 +08:00
parent f3891e1335
commit 80bd0003ce
2 changed files with 14 additions and 3 deletions
--- a/extensions/msteams/src/graph-thread.test.ts
+++ b/extensions/msteams/src/graph-thread.test.ts
@@ -37,6 +37,15 @@ describe("stripHtmlFromTeamsMessage", () => {
    );
  });

+  it("does not double-decode escaped entities (decodes &amp; last)", () => {
+    // Graph encodes literally-typed entity text by escaping its '&' to '&amp;'.
+    // Decoding '&amp;' first would re-decode the now-bare '&lt;'/'&gt;' into
+    // angle brackets, corrupting the user's literal text.
+    expect(stripHtmlFromTeamsMessage("The token is &amp;lt;APIKEY&amp;gt;")).toBe(
+      "The token is &lt;APIKEY&gt;",
+    );
+  });
+
  it("normalizes multiple whitespace to single space", () => {
    expect(stripHtmlFromTeamsMessage("hello   world")).toBe("hello world");
  });
--- a/extensions/msteams/src/graph-thread.ts
+++ b/extensions/msteams/src/graph-thread.ts
@@ -35,14 +35,16 @@ export function stripHtmlFromTeamsMessage(html: string): string {
  let text = html.replace(/<at[^>]*>(.*?)<\/at>/gi, "@$1");
  // Strip remaining HTML tags.
  text = text.replace(/<[^>]*>/g, " ");
-  // Decode common HTML entities.
+  // Decode common HTML entities. &amp; must be decoded LAST to prevent
+  // double-decoding (e.g. &amp;lt; → &lt; not <), matching decodeHtmlEntities
+  // in inbound.ts.
  text = text
-    .replace(/&amp;/g, "&")
    .replace(/&lt;/g, "<")
    .replace(/&gt;/g, ">")
    .replace(/&quot;/g, '"')
    .replace(/&#39;/g, "'")
-    .replace(/&nbsp;/g, " ");
+    .replace(/&nbsp;/g, " ")
+    .replace(/&amp;/g, "&");
  // Normalize whitespace.
  return text.replace(/\s+/g, " ").trim();
 }