rust-pickaxe 0.5.5

HTML data extraction library
Documentation
pub use htmd::HtmlToMarkdown;

use crate::errors::{PackageError, Result};


/// Convert an HTML string to markdown with the specified converter.
///
/// * `html` - The HTML string to convert.
/// * `converter` - The converter to use.
pub fn html_to_markdown_with_converter(html: String, converter: HtmlToMarkdown) -> Result<String> {
    Ok(
        converter.convert(&html)
            .map_err(|e| PackageError::UnknownError(e.to_string()))?
    )
}

/// Convert an HTML string to markdown.
/// 
/// * `html` - The HTML string to convert.
pub fn html_to_markdown(html: String) -> Result<String> {
    Ok(
        html_to_markdown_with_converter(
            html,
            HtmlToMarkdown::builder()
                .skip_tags(vec!["script", "style", "img"])
                .build()
        )?
    )
}