1use std::io::Cursor;
2
3use dom_smoothie::{Article, Config, Readability};
4use eyre::Result;
5use html2text::from_read;
6
7pub fn clean_html(html: &str) -> Result<String> {
11 let cfg = Config { max_elements_to_parse: 9000, ..Default::default() };
12 let mut readable = Readability::new(html, Some("http://example.com"), Some(cfg))?;
13 let article: Article = readable.parse()?;
14
15 let content = Cursor::new(article.content.as_bytes());
17 let text = from_read(content, 10000)?;
18
19 Ok(super::text::TextCleaner::new().reduce_newlines_to_double_newline().run(&text))
21}