pter 0.1.0

Plain Text Email Renderer — convert HTML email bodies into readable markdown
Documentation
use scraper::node::Element;

/// What kind of markdown wrapper an element produces.
pub enum ElementAction {
    /// Skip this element and all its children entirely.
    Skip,
    /// Render children only, no wrapper (transparent element).
    Transparent,
    /// Block element with specific rendering.
    Block(BlockKind),
    /// Inline element with specific rendering.
    Inline(InlineKind),
}

pub enum BlockKind {
    Paragraph,
    Heading(u8),
    Blockquote,
    UnorderedList,
    OrderedList,
    ListItem,
    PreFormatted,
    HorizontalRule,
    Table,
    Div,
}

pub enum InlineKind {
    Bold,
    Italic,
    Strikethrough,
    Code,
    Link,
    Image,
    LineBreak,
    Superscript,
    Subscript,
}

/// Classify an HTML element into the action pter should take.
pub fn classify(el: &Element) -> ElementAction {
    match el.name() {
        // Skip entirely
        "script" | "style" | "head" | "meta" | "link" | "title" | "noscript" => {
            ElementAction::Skip
        }

        // Block elements
        "p" => ElementAction::Block(BlockKind::Paragraph),
        "h1" => ElementAction::Block(BlockKind::Heading(1)),
        "h2" => ElementAction::Block(BlockKind::Heading(2)),
        "h3" => ElementAction::Block(BlockKind::Heading(3)),
        "h4" => ElementAction::Block(BlockKind::Heading(4)),
        "h5" => ElementAction::Block(BlockKind::Heading(5)),
        "h6" => ElementAction::Block(BlockKind::Heading(6)),
        "blockquote" => ElementAction::Block(BlockKind::Blockquote),
        "ul" | "menu" => ElementAction::Block(BlockKind::UnorderedList),
        "ol" => ElementAction::Block(BlockKind::OrderedList),
        "li" => ElementAction::Block(BlockKind::ListItem),
        "pre" => ElementAction::Block(BlockKind::PreFormatted),
        "hr" => ElementAction::Block(BlockKind::HorizontalRule),
        "table" => ElementAction::Block(BlockKind::Table),
        // Table sub-elements are handled by the Table block handler, not individually
        "thead" | "tbody" | "tfoot" | "tr" | "td" | "th" | "caption" | "colgroup" | "col" => {
            ElementAction::Transparent
        }
        "div" | "section" | "article" | "main" | "header" | "footer" | "nav" | "aside"
        | "figure" | "figcaption" | "details" | "summary" => {
            ElementAction::Block(BlockKind::Div)
        }

        // Inline elements
        "strong" | "b" => ElementAction::Inline(InlineKind::Bold),
        "em" | "i" => ElementAction::Inline(InlineKind::Italic),
        "del" | "s" | "strike" => ElementAction::Inline(InlineKind::Strikethrough),
        "code" | "tt" => ElementAction::Inline(InlineKind::Code),
        "a" => ElementAction::Inline(InlineKind::Link),
        "img" => ElementAction::Inline(InlineKind::Image),
        "br" => ElementAction::Inline(InlineKind::LineBreak),
        "sup" => ElementAction::Inline(InlineKind::Superscript),
        "sub" => ElementAction::Inline(InlineKind::Subscript),

        // Everything else: transparent (render children)
        _ => ElementAction::Transparent,
    }
}

/// Check if an <img> element is a tracking pixel.
/// Returns true if it should be skipped.
pub fn is_tracking_pixel(el: &Element) -> bool {
    let width = el.attr("width");
    let height = el.attr("height");

    // 1x1 or 0x0 images
    if matches!(width, Some("1" | "0")) || matches!(height, Some("1" | "0")) {
        return true;
    }

    // No src attribute
    let Some(src) = el.attr("src") else {
        return true;
    };

    // Empty or data:image/gif (common transparent pixel)
    if src.is_empty() {
        return true;
    }
    if src.starts_with("data:image/gif;base64,R0lGOD") {
        return true;
    }

    // Check inline style for tiny dimensions
    if let Some(style) = el.attr("style") {
        let style_lower = style.to_lowercase();
        if style_lower.contains("width:1px")
            || style_lower.contains("width: 1px")
            || style_lower.contains("width:0")
            || style_lower.contains("height:1px")
            || style_lower.contains("height: 1px")
            || style_lower.contains("height:0")
            || style_lower.contains("display:none")
            || style_lower.contains("display: none")
        {
            return true;
        }
    }

    false
}

#[cfg(test)]
mod tests {
    use super::*;
    use scraper::{Html, Selector};

    fn classify_tag(tag: &str) -> ElementAction {
        let html = format!("<{tag}></{tag}>");
        let doc = Html::parse_fragment(&html);
        let sel = Selector::parse(tag).unwrap();
        let el = doc.select(&sel).next().unwrap();
        classify(el.value())
    }

    fn img_is_pixel(attrs: &str) -> bool {
        let html = format!("<div><img {attrs} ></div>");
        let doc = Html::parse_fragment(&html);
        let sel = Selector::parse("img").unwrap();
        let el = doc.select(&sel).next().unwrap();
        is_tracking_pixel(el.value())
    }

    fn div_is_hidden(attrs: &str) -> bool {
        let html = format!("<div {attrs}></div>");
        let doc = Html::parse_fragment(&html);
        let sel = Selector::parse("div").unwrap();
        let el = doc.select(&sel).next().unwrap();
        is_hidden(el.value())
    }

    // -- classify: heading levels (h4/h5/h6 arms) --
    // Without these arms, the elements fall through to `_ => Transparent`,
    // which differs from `Block(Heading(n))`. Tests catch the deletion.

    #[test]
    fn classify_h1_is_heading_1() {
        assert!(matches!(classify_tag("h1"), ElementAction::Block(BlockKind::Heading(1))));
    }

    #[test]
    fn classify_h4_is_heading_4() {
        assert!(matches!(classify_tag("h4"), ElementAction::Block(BlockKind::Heading(4))));
    }

    #[test]
    fn classify_h5_is_heading_5() {
        assert!(matches!(classify_tag("h5"), ElementAction::Block(BlockKind::Heading(5))));
    }

    #[test]
    fn classify_h6_is_heading_6() {
        assert!(matches!(classify_tag("h6"), ElementAction::Block(BlockKind::Heading(6))));
    }

    #[test]
    fn classify_script_is_skip() {
        assert!(matches!(classify_tag("script"), ElementAction::Skip));
    }

    #[test]
    fn classify_table_is_block_table() {
        assert!(matches!(classify_tag("table"), ElementAction::Block(BlockKind::Table)));
    }

    #[test]
    fn classify_strong_is_inline_bold() {
        assert!(matches!(classify_tag("strong"), ElementAction::Inline(InlineKind::Bold)));
    }

    // -- is_tracking_pixel: each || arm needs its own positive test --

    #[test]
    fn pixel_width_1_only() {
        assert!(img_is_pixel(r#"src="x" width="1" height="100""#));
    }

    #[test]
    fn pixel_height_1_only() {
        // Catches L95 mutating || to && (width OR height; not AND)
        assert!(img_is_pixel(r#"src="x" width="100" height="1""#));
    }

    #[test]
    fn pixel_width_0_only() {
        assert!(img_is_pixel(r#"src="x" width="0" height="100""#));
    }

    #[test]
    fn pixel_no_src_is_pixel() {
        assert!(img_is_pixel(r#"width="100" height="100""#));
    }

    #[test]
    fn pixel_empty_src_is_pixel() {
        assert!(img_is_pixel(r#"src="" width="100" height="100""#));
    }

    #[test]
    fn pixel_transparent_gif_data_uri_is_pixel() {
        assert!(img_is_pixel(
            r#"src="data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==" width="100" height="100""#
        ));
    }

    // Each `||` arm in the style chain (L115–122) — each needs its own input
    // that triggers ONLY that arm. Catches `replace || with &&` mutants.

    #[test]
    fn pixel_style_width_1px() {
        assert!(img_is_pixel(r#"src="x" style="width:1px""#));
    }

    #[test]
    fn pixel_style_width_space_1px() {
        assert!(img_is_pixel(r#"src="x" style="width: 1px""#));
    }

    #[test]
    fn pixel_style_width_0() {
        assert!(img_is_pixel(r#"src="x" style="width:0""#));
    }

    #[test]
    fn pixel_style_height_1px() {
        assert!(img_is_pixel(r#"src="x" style="height:1px""#));
    }

    #[test]
    fn pixel_style_height_space_1px() {
        assert!(img_is_pixel(r#"src="x" style="height: 1px""#));
    }

    #[test]
    fn pixel_style_height_0() {
        assert!(img_is_pixel(r#"src="x" style="height:0""#));
    }

    #[test]
    fn pixel_style_display_none() {
        assert!(img_is_pixel(r#"src="x" style="display:none""#));
    }

    #[test]
    fn pixel_style_display_space_none() {
        assert!(img_is_pixel(r#"src="x" style="display: none""#));
    }

    #[test]
    fn pixel_normal_image_is_not_pixel() {
        assert!(!img_is_pixel(
            r#"src="https://example.com/cat.jpg" width="500" height="300""#
        ));
    }

    // -- is_hidden: each || arm with its own targeted test --

    #[test]
    fn hidden_display_none() {
        assert!(div_is_hidden(r#"style="display:none""#));
    }

    #[test]
    fn hidden_display_space_none() {
        assert!(div_is_hidden(r#"style="display: none""#));
    }

    #[test]
    fn hidden_visibility_hidden() {
        assert!(div_is_hidden(r#"style="visibility:hidden""#));
    }

    #[test]
    fn hidden_visibility_space_hidden() {
        assert!(div_is_hidden(r#"style="visibility: hidden""#));
    }

    #[test]
    fn hidden_font_size_0() {
        assert!(div_is_hidden(r#"style="font-size:0""#));
    }

    #[test]
    fn hidden_font_size_space_0() {
        assert!(div_is_hidden(r#"style="font-size: 0""#));
    }

    #[test]
    fn hidden_line_height_0() {
        assert!(div_is_hidden(r#"style="line-height:0""#));
    }

    #[test]
    fn hidden_line_height_space_0() {
        assert!(div_is_hidden(r#"style="line-height: 0""#));
    }

    // The (height:0 && overflow:hidden) and (height: 0 && overflow: hidden) arms
    // need both halves present to fire. Tests cover each form, plus the negative
    // case where height:0 alone is NOT hidden (catches && → || mutation on L146/147).

    #[test]
    fn hidden_height_0_with_overflow_no_spaces() {
        assert!(div_is_hidden(r#"style="height:0;overflow:hidden""#));
    }

    #[test]
    fn hidden_height_0_with_overflow_with_spaces() {
        assert!(div_is_hidden(r#"style="height: 0;overflow: hidden""#));
    }

    #[test]
    fn hidden_height_0_alone_is_not_hidden() {
        // Catches the L146 && → || mutation: with ||, this would erroneously be hidden.
        assert!(!div_is_hidden(r#"style="height:0""#));
    }

    #[test]
    fn hidden_height_space_0_alone_is_not_hidden() {
        // Same boundary check for the space variant — catches the && → || mutation
        // on the `(height: 0 && overflow: hidden)` arm specifically.
        assert!(!div_is_hidden(r#"style="height: 0""#));
    }

    #[test]
    fn hidden_max_height_0() {
        assert!(div_is_hidden(r#"style="max-height:0""#));
    }

    #[test]
    fn hidden_max_height_space_0() {
        assert!(div_is_hidden(r#"style="max-height: 0""#));
    }

    #[test]
    fn hidden_no_signal_in_style() {
        assert!(!div_is_hidden(r#"style="color:red;font-weight:bold""#));
    }

    #[test]
    fn hidden_no_style_attr_is_not_hidden() {
        assert!(!div_is_hidden(""));
    }
}

/// Check if an element is hidden via inline style.
///
/// Catches display:none, visibility:hidden, and spacer tricks
/// like font-size:0 or line-height:0 (commonly used in email templates).
pub fn is_hidden(el: &Element) -> bool {
    if let Some(style) = el.attr("style") {
        let s = style.to_lowercase();
        if s.contains("display:none")
            || s.contains("display: none")
            || s.contains("visibility:hidden")
            || s.contains("visibility: hidden")
            || s.contains("font-size:0")
            || s.contains("font-size: 0")
            || s.contains("line-height:0")
            || s.contains("line-height: 0")
            || (s.contains("height:0") && s.contains("overflow:hidden"))
            || (s.contains("height: 0") && s.contains("overflow: hidden"))
            || s.contains("max-height:0")
            || s.contains("max-height: 0")
        {
            return true;
        }
    }
    false
}