fix(msteams): decode & last in stripHtmlFromTeamsMessage to avoid double-decoding (#96342)

stripHtmlFromTeamsMessage decoded & FIRST, so literal entity text the
user typed (which Microsoft Graph returns double-encoded, e.g. <) got
re-decoded into markup: "The token is <APIKEY>" became
"The token is <APIKEY>" instead of the correct "The token is &lt;APIKEY&gt;".

Reorder so &amp; is decoded last, mirroring the documented ordering in
decodeHtmlEntities (inbound.ts), whose comment already states it 'must be last
to prevent double-decoding (e.g. &amp;lt; -> &lt; not <)'. Behavior-preserving
for all singly-encoded input; the existing entity test is unchanged.

Co-authored-by: ly-wang19 <ly-wang19@users.noreply.github.com>
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
ly-wang19
2026-06-24 21:34:40 +08:00
committed by GitHub
parent f3891e1335
commit 80bd0003ce
2 changed files with 14 additions and 3 deletions

View File

@@ -37,6 +37,15 @@ describe("stripHtmlFromTeamsMessage", () => {
);
});
it("does not double-decode escaped entities (decodes &amp; last)", () => {
// Graph encodes literally-typed entity text by escaping its '&' to '&amp;'.
// Decoding '&amp;' first would re-decode the now-bare '&lt;'/'&gt;' into
// angle brackets, corrupting the user's literal text.
expect(stripHtmlFromTeamsMessage("The token is &amp;lt;APIKEY&amp;gt;")).toBe(
"The token is &lt;APIKEY&gt;",
);
});
it("normalizes multiple whitespace to single space", () => {
expect(stripHtmlFromTeamsMessage("hello world")).toBe("hello world");
});

View File

@@ -35,14 +35,16 @@ export function stripHtmlFromTeamsMessage(html: string): string {
let text = html.replace(/<at[^>]*>(.*?)<\/at>/gi, "@$1");
// Strip remaining HTML tags.
text = text.replace(/<[^>]*>/g, " ");
// Decode common HTML entities.
// Decode common HTML entities. &amp; must be decoded LAST to prevent
// double-decoding (e.g. &amp;lt; → &lt; not <), matching decodeHtmlEntities
// in inbound.ts.
text = text
.replace(/&amp;/g, "&")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&nbsp;/g, " ");
.replace(/&nbsp;/g, " ")
.replace(/&amp;/g, "&");
// Normalize whitespace.
return text.replace(/\s+/g, " ").trim();
}