import { describe, expect, it } from "vitest";
import { createReadabilityWebContentExtractor } from "./web-content-extractor.js";
const SAMPLE_HTML = `
Example Article
Example Article
Main content starts here with enough words to satisfy readability.
Second paragraph for a bit more signal.
`;
type ReadabilityResult = Awaited<
ReturnType["extract"]>
>;
function requireReadabilityResult(result: ReadabilityResult): NonNullable {
if (!result) {
throw new Error("expected readability extraction result");
}
return result;
}
describe("web readability extractor", () => {
it("extracts readable text", async () => {
const extractor = createReadabilityWebContentExtractor();
const result = await extractor.extract({
html: SAMPLE_HTML,
url: "https://example.com/article",
extractMode: "text",
});
const extracted = requireReadabilityResult(result);
expect(extracted.text).toContain("Main content starts here");
expect(extracted.title).toBe("Example Article");
});
it("extracts readable markdown", async () => {
const extractor = createReadabilityWebContentExtractor();
const result = await extractor.extract({
html: SAMPLE_HTML,
url: "https://example.com/article",
extractMode: "markdown",
});
const extracted = requireReadabilityResult(result);
expect(extracted.text).toContain("Main content starts here");
expect(extracted.title).toBe("Example Article");
});
});