pter 0.1.0

Plain Text Email Renderer — convert HTML email bodies into readable markdown
Documentation
use proptest::prelude::*;

// Strategy: generate arbitrary HTML-like strings
fn html_fragment() -> impl Strategy<Value = String> {
    let tags = prop::sample::select(vec![
        "p", "div", "span", "strong", "em", "a", "h1", "h2", "h3",
        "ul", "ol", "li", "blockquote", "pre", "code", "br", "hr",
        "img", "table", "tr", "td", "th", "b", "i", "del", "sup", "sub",
    ]);

    let text = "[a-zA-Z0-9 .,!?]{0,100}";

    prop::collection::vec(
        prop_oneof![
            // Plain text
            text.prop_map(|s| s),
            // Opening + closing tag with text
            (tags.clone(), text).prop_map(|(tag, content)| {
                format!("<{tag}>{content}</{tag}>")
            }),
            // Self-closing tag
            tags.clone().prop_map(|tag| format!("<{tag}/>")),
            // Nested tags
            (tags.clone(), tags.clone(), text).prop_map(|(outer, inner, content)| {
                format!("<{outer}><{inner}>{content}</{inner}></{outer}>")
            }),
        ],
        1..10,
    )
    .prop_map(|parts| parts.join(""))
}

proptest! {
    #[test]
    fn never_panics(html in html_fragment()) {
        let _ = pter::convert(&html);
    }

    #[test]
    fn never_panics_on_arbitrary_bytes(s in "\\PC{0,500}") {
        let _ = pter::convert(&s);
    }

    #[test]
    fn output_contains_no_html_tags(html in html_fragment()) {
        let md = pter::convert(&html);
        // Output should never contain raw HTML tags
        // (except inside code blocks, which we skip checking)
        let without_code_blocks: String = md
            .split("```")
            .enumerate()
            .filter(|(i, _)| i % 2 == 0) // only outside code blocks
            .map(|(_, s)| s)
            .collect();

        // No <script>, <style>, <div>, etc. should leak through
        assert!(!without_code_blocks.contains("<script"), "leaked <script> in: {md}");
        assert!(!without_code_blocks.contains("<style"), "leaked <style> in: {md}");
        assert!(!without_code_blocks.contains("<head"), "leaked <head> in: {md}");
    }

    #[test]
    fn output_is_valid_utf8(html in html_fragment()) {
        let md = pter::convert(&html);
        // String type guarantees UTF-8, but verify no replacement chars snuck in
        // from bad entity decoding
        assert!(!md.contains('\u{FFFD}'), "replacement char in: {md}");
    }

    #[test]
    fn no_excessive_blank_lines(html in html_fragment()) {
        let md = pter::convert(&html);
        assert!(!md.contains("\n\n\n"), "triple newline in output: {md}");
    }

    #[test]
    fn no_trailing_whitespace_on_lines(html in html_fragment()) {
        let md = pter::convert(&html);
        for (i, line) in md.lines().enumerate() {
            assert!(
                line == line.trim_end(),
                "trailing whitespace on line {i}: '{line}'"
            );
        }
    }

    #[test]
    fn empty_input_returns_empty(s in "\\s{0,20}") {
        let html = format!("<html><body>{s}</body></html>");
        let md = pter::convert(&html);
        // Whitespace-only input should produce empty or whitespace-only output
        assert!(md.trim().is_empty() || !s.trim().is_empty());
    }
}