import { describe, expect, it } from "vitest"; import { createReadabilityWebContentExtractor } from "./web-content-extractor.js"; const SAMPLE_HTML = ` Example Article

Example Article

Main content starts here with enough words to satisfy readability.

Second paragraph for a bit more signal.

`; describe("web readability extractor", () => { it("extracts readable text", async () => { const extractor = createReadabilityWebContentExtractor(); const result = await extractor.extract({ html: SAMPLE_HTML, url: "https://example.com/article", extractMode: "text", }); expect(result?.text).toContain("Main content starts here"); expect(result?.title).toBe("Example Article"); }); it("extracts readable markdown", async () => { const extractor = createReadabilityWebContentExtractor(); const result = await extractor.extract({ html: SAMPLE_HTML, url: "https://example.com/article", extractMode: "markdown", }); expect(result?.text).toContain("Main content starts here"); expect(result?.title).toBe("Example Article"); }); });