import { describe, expect, it } from "vitest"; import { createReadabilityWebContentExtractor } from "./web-content-extractor.js"; const SAMPLE_HTML = ` Example Article

Example Article

Main content starts here with enough words to satisfy readability.

Second paragraph for a bit more signal.

`; type ReadabilityResult = Awaited< ReturnType["extract"]> >; function requireReadabilityResult(result: ReadabilityResult): NonNullable { if (!result) { throw new Error("expected readability extraction result"); } return result; } describe("web readability extractor", () => { it("extracts readable text", async () => { const extractor = createReadabilityWebContentExtractor(); const result = await extractor.extract({ html: SAMPLE_HTML, url: "https://example.com/article", extractMode: "text", }); const extracted = requireReadabilityResult(result); expect(extracted.text).toContain("Main content starts here"); expect(extracted.title).toBe("Example Article"); }); it("extracts readable markdown", async () => { const extractor = createReadabilityWebContentExtractor(); const result = await extractor.extract({ html: SAMPLE_HTML, url: "https://example.com/article", extractMode: "markdown", }); const extracted = requireReadabilityResult(result); expect(extracted.text).toContain("Main content starts here"); expect(extracted.title).toBe("Example Article"); }); });