use readable_readability::Readability;
pub fn parse_content_type_base(header: &str) -> String {
let base = match header.find(';') {
Some(i) => &header[..i],
None => header,
};
base.trim().to_ascii_lowercase()
}
pub fn is_html_like(content_type_base: &str) -> bool {
content_type_base == "text/html" || content_type_base == "application/xhtml+xml"
}
pub fn extract_markdown(html: &str, _url: &str) -> (String, bool) {
let mut parser = Readability::new();
let (node, metadata) = parser.parse(html);
let mut extracted_html = Vec::new();
if node.serialize(&mut extracted_html).is_ok() {
if let Ok(frag) = String::from_utf8(extracted_html) {
if !frag.trim().is_empty() {
let md = html2md::parse_html(&frag);
let title = metadata
.page_title
.map(|t| format!("# {}\n\n", t))
.unwrap_or_default();
return (format!("{}{}", title, md).trim_end().to_string() + "\n", false);
}
}
}
let md = html2md::parse_html(html);
(md, true)
}