mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 20:00:42 +00:00
refactor(web-fetch): move readability extraction to plugin
* refactor(web-fetch): move readability extraction to plugin * fix(web-fetch): cache extractor resolution by config * fix(test): remove redundant stat assertions
This commit is contained in:
50
extensions/web-readability/web-content-extractor.test.ts
Normal file
50
extensions/web-readability/web-content-extractor.test.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { createReadabilityWebContentExtractor } from "./web-content-extractor.js";
|
||||
|
||||
const SAMPLE_HTML = `<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Example Article</title>
|
||||
</head>
|
||||
<body>
|
||||
<nav>
|
||||
<ul>
|
||||
<li><a href="/home">Home</a></li>
|
||||
<li><a href="/about">About</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
<main>
|
||||
<article>
|
||||
<h1>Example Article</h1>
|
||||
<p>Main content starts here with enough words to satisfy readability.</p>
|
||||
<p>Second paragraph for a bit more signal.</p>
|
||||
</article>
|
||||
</main>
|
||||
<footer>Footer text</footer>
|
||||
</body>
|
||||
</html>`;
|
||||
|
||||
describe("web readability extractor", () => {
|
||||
it("extracts readable text", async () => {
|
||||
const extractor = createReadabilityWebContentExtractor();
|
||||
const result = await extractor.extract({
|
||||
html: SAMPLE_HTML,
|
||||
url: "https://example.com/article",
|
||||
extractMode: "text",
|
||||
});
|
||||
expect(result?.text).toContain("Main content starts here");
|
||||
expect(result?.title).toBe("Example Article");
|
||||
});
|
||||
|
||||
it("extracts readable markdown", async () => {
|
||||
const extractor = createReadabilityWebContentExtractor();
|
||||
const result = await extractor.extract({
|
||||
html: SAMPLE_HTML,
|
||||
url: "https://example.com/article",
|
||||
extractMode: "markdown",
|
||||
});
|
||||
expect(result?.text).toContain("Main content starts here");
|
||||
expect(result?.title).toBe("Example Article");
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user