Files
openclaw/extensions/web-readability/web-content-extractor.test.ts
Vincent Koc 86099ec62a refactor(web-fetch): move readability extraction to plugin
* refactor(web-fetch): move readability extraction to plugin

* fix(web-fetch): cache extractor resolution by config

* fix(test): remove redundant stat assertions
2026-04-24 13:34:37 -07:00

51 lines
1.5 KiB
TypeScript

import { describe, expect, it } from "vitest";
import { createReadabilityWebContentExtractor } from "./web-content-extractor.js";
const SAMPLE_HTML = `<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>Example Article</title>
</head>
<body>
<nav>
<ul>
<li><a href="/home">Home</a></li>
<li><a href="/about">About</a></li>
</ul>
</nav>
<main>
<article>
<h1>Example Article</h1>
<p>Main content starts here with enough words to satisfy readability.</p>
<p>Second paragraph for a bit more signal.</p>
</article>
</main>
<footer>Footer text</footer>
</body>
</html>`;
describe("web readability extractor", () => {
it("extracts readable text", async () => {
const extractor = createReadabilityWebContentExtractor();
const result = await extractor.extract({
html: SAMPLE_HTML,
url: "https://example.com/article",
extractMode: "text",
});
expect(result?.text).toContain("Main content starts here");
expect(result?.title).toBe("Example Article");
});
it("extracts readable markdown", async () => {
const extractor = createReadabilityWebContentExtractor();
const result = await extractor.extract({
html: SAMPLE_HTML,
url: "https://example.com/article",
extractMode: "markdown",
});
expect(result?.text).toContain("Main content starts here");
expect(result?.title).toBe("Example Article");
});
});