fix(qqbot): support HTML entities in media tags (< >) (#60493)

* fix(qqbot): 支持媒体标签中的 HTML 实体(< >)

* fix(qqbot): support HTML entities in media tags

* test(qqbot): add unit tests for media tag regex with HTML entities

* test(qqbot): export regex constants to enable unit tests

* fix(qqbot): reset regex lastIndex in tests to avoid state pollution

* test(qqbot): add .js extension to import in media-tags.test.ts

* fix(qqbot): support HTML entities in media tags (#60493) (thanks @ylc0919)

---------

Co-authored-by: sliverp <870080352@qq.com>
This commit is contained in:
游乐场
2026-04-08 18:35:14 +08:00
committed by GitHub
parent 2fdeb7af96
commit 210ee4cfd2
3 changed files with 47 additions and 12 deletions

View File

@@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- QQBot/media-tags: support HTML entity-encoded angle brackets (`&lt;`/`&gt;`) in media-tag regexes so entity-escaped `<qqimg>` tags from upstream are correctly parsed and normalized. (#60493) Thanks @ylc0919.
- Slack/media: preserve bearer auth across same-origin `files.slack.com` redirects while still stripping it on cross-origin Slack CDN hops, so `url_private_download` image attachments load again. (#62960) Thanks @vincentkoc.
- Control UI: guard stale session-history reloads during fast session switches so the selected session and rendered transcript stay in sync. (#62975) Thanks @scoootscooob.

View File

@@ -0,0 +1,32 @@
import { describe, it, expect } from 'vitest';
import { FUZZY_MEDIA_TAG_REGEX, SELF_CLOSING_TAG_REGEX } from './media-tags.js';
describe('media-tags with HTML entities', () => {
it('extracts URL from entity-encoded fuzzy tag', () => {
const input = '&lt;qqimg&gt;https://example.com/a.png&lt;/qqimg&gt;';
FUZZY_MEDIA_TAG_REGEX.lastIndex = 0;
const match = FUZZY_MEDIA_TAG_REGEX.exec(input);
expect(match?.[2]).toBe('https://example.com/a.png');
});
it('extracts URL from mixed entity+plain tag', () => {
const input = '&lt;qqimg&gt;https://example.com/b.png</qqimg>';
FUZZY_MEDIA_TAG_REGEX.lastIndex = 0;
const match = FUZZY_MEDIA_TAG_REGEX.exec(input);
expect(match?.[2]).toBe('https://example.com/b.png');
});
it('extracts file from entity-encoded self-closing tag', () => {
const input = '&lt;qqmedia file="https://example.com/c.zip" /&gt;';
SELF_CLOSING_TAG_REGEX.lastIndex = 0;
const match = SELF_CLOSING_TAG_REGEX.exec(input);
expect(match?.[2]).toBe('https://example.com/c.zip');
});
it('does not match invalid input', () => {
const input = 'no tag here';
FUZZY_MEDIA_TAG_REGEX.lastIndex = 0;
const match = FUZZY_MEDIA_TAG_REGEX.exec(input);
expect(match).toBeNull();
});
});

View File

@@ -49,10 +49,12 @@ ALL_TAG_NAMES.sort((a, b) => b.length - a.length);
const TAG_NAME_PATTERN = ALL_TAG_NAMES.join("|");
const LEFT_BRACKET = "(?:[<<]|&lt;)";
const RIGHT_BRACKET = "(?:[>>]|&gt;)";
/** Match self-closing media-tag syntax with file/src/path/url attributes. */
const SELF_CLOSING_TAG_REGEX = new RegExp(
export const SELF_CLOSING_TAG_REGEX = new RegExp(
"`?" +
"[<<]\\s*(" +
LEFT_BRACKET + "\\s*(" +
TAG_NAME_PATTERN +
")" +
"(?:\\s+(?!file|src|path|url)[a-z_-]+\\s*=\\s*[\"']?[^\"'/>>]*?[\"']?)*" +
@@ -62,23 +64,23 @@ const SELF_CLOSING_TAG_REGEX = new RegExp(
"[\"']?" +
"(?:\\s+[a-z_-]+\\s*=\\s*[\"']?[^\"'/>>]*?[\"']?)*" +
"\\s*/?" +
"\\s*[>>]" +
"\\s*" + RIGHT_BRACKET +
"`?",
"gi",
);
/** Match malformed wrapped media tags that should be normalized. */
const FUZZY_MEDIA_TAG_REGEX = new RegExp(
export const FUZZY_MEDIA_TAG_REGEX = new RegExp(
"`?" +
"[<<]\\s*(" +
LEFT_BRACKET + "\\s*(" +
TAG_NAME_PATTERN +
")\\s*[>>]" +
")\\s*" + RIGHT_BRACKET +
"[\"']?\\s*" +
"([^<<>\"'`]+?)" +
"\\s*[\"']?" +
"[<<]\\s*/?\\s*(?:" +
LEFT_BRACKET + "\\s*/?\\s*(?:" +
TAG_NAME_PATTERN +
")\\s*[>>]" +
")\\s*" + RIGHT_BRACKET +
"`?",
"gi",
);
@@ -94,13 +96,13 @@ function resolveTagName(raw: string): (typeof VALID_TAGS)[number] {
/** Match wrapped tags whose bodies need newline and tab cleanup. */
const MULTILINE_TAG_CLEANUP = new RegExp(
"([<<]\\s*(?:" +
"(" + LEFT_BRACKET + "\\s*(?:" +
TAG_NAME_PATTERN +
")\\s*[>>])" +
")\\s*" + RIGHT_BRACKET + ")" +
"([\\s\\S]*?)" +
"([<<]\\s*/?\\s*(?:" +
"(" + LEFT_BRACKET + "\\s*/?\\s*(?:" +
TAG_NAME_PATTERN +
")\\s*[>>])",
")\\s*" + RIGHT_BRACKET + ")",
"gi",
);