pter 0.1.0

Plain Text Email Renderer — convert HTML email bodies into readable markdown
Documentation
use pter::convert;

#[test]
fn empty_string() {
    assert_eq!(convert(""), "");
}

#[test]
fn whitespace_only() {
    assert_eq!(convert("   \n\t  "), "");
}

#[test]
fn just_tags_no_content() {
    assert_eq!(convert("<div><p><span></span></p></div>"), "");
}

#[test]
fn deeply_nested_divs() {
    let mut html = String::new();
    for _ in 0..100 {
        html.push_str("<div>");
    }
    html.push_str("deep content");
    for _ in 0..100 {
        html.push_str("</div>");
    }
    let md = convert(&html);
    assert!(md.contains("deep content"));
}

#[test]
fn deeply_nested_blockquotes() {
    let mut html = String::new();
    for _ in 0..20 {
        html.push_str("<blockquote>");
    }
    html.push_str("very deep");
    for _ in 0..20 {
        html.push_str("</blockquote>");
    }
    let md = convert(&html);
    assert!(md.contains("very deep"));
    // Should have many > prefixes
    assert!(md.contains("> > > > >"));
}

#[test]
fn deeply_nested_lists() {
    let mut html = String::new();
    for _ in 0..10 {
        html.push_str("<ul><li>");
    }
    html.push_str("deep item");
    for _ in 0..10 {
        html.push_str("</li></ul>");
    }
    let md = convert(&html);
    assert!(md.contains("deep item"));
}

#[test]
fn malformed_unclosed_tags() {
    // html5ever auto-corrects these
    let md = convert("<p>unclosed paragraph<p>another one");
    assert!(md.contains("unclosed paragraph"));
    assert!(md.contains("another one"));
}

#[test]
fn malformed_mismatched_tags() {
    let md = convert("<b><i>crossed</b></i>");
    assert!(md.contains("crossed"));
}

#[test]
fn only_script_content() {
    assert_eq!(convert("<script>alert('xss')</script>"), "");
}

#[test]
fn only_style_content() {
    assert_eq!(convert("<style>.x { color: red; }</style>"), "");
}

#[test]
fn only_tracking_pixels() {
    let html = r#"
        <img src="a.gif" width="1" height="1">
        <img src="b.gif" width="1" height="1">
    "#;
    assert_eq!(convert(html), "");
}

#[test]
fn unicode_content() {
    let md = convert("<p>日本語テスト 🎉 émojis café</p>");
    assert!(md.contains("日本語テスト"));
    assert!(md.contains("🎉"));
    assert!(md.contains("café"));
}

#[test]
fn html_entities_numeric() {
    let md = convert("<p>&#169; &#8212; &#x2019;</p>");
    assert!(md.contains("©"));
    assert!(md.contains(""));
}

#[test]
fn large_input_doesnt_blow_up() {
    let para = "<p>Hello world. This is a test paragraph with some content.</p>";
    let html: String = para.repeat(1000);
    let md = convert(&html);
    assert!(md.contains("Hello world"));
    // Should be proportional, not quadratic
    assert!(md.len() < html.len());
}

#[test]
fn link_with_nested_formatting() {
    let html = r#"<a href="https://example.com"><strong>bold link</strong></a>"#;
    let md = convert(html);
    assert!(md.contains("[**bold link**](https://example.com)"));
}

#[test]
fn image_with_no_alt() {
    let md = convert(r#"<img src="photo.jpg">"#);
    assert!(md.contains("![](photo.jpg)"));
}

#[test]
fn consecutive_inline_elements() {
    let md = convert("<b>bold</b><i>italic</i><code>code</code>");
    assert_eq!(md, "**bold***italic*`code`");
}

#[test]
fn table_with_empty_cells() {
    let html = "<table><tr><th>A</th><th>B</th></tr>\
                 <tr><td></td><td>val</td></tr></table>";
    let md = convert(html);
    assert!(md.contains("| A | B |"));
    assert!(md.contains("|  | val |"));
}

#[test]
fn pre_with_html_inside() {
    let html = "<pre>&lt;div&gt;not a tag&lt;/div&gt;</pre>";
    let md = convert(html);
    assert!(md.contains("```"));
    assert!(md.contains("<div>not a tag</div>"));
}

#[test]
fn multiple_spaces_in_source() {
    let md = convert("<p>word1     word2     word3</p>");
    assert_eq!(md, "word1 word2 word3");
}

#[test]
fn newlines_in_source_collapsed() {
    let md = convert("<p>line1\n\n\nline2</p>");
    assert_eq!(md, "line1 line2");
}

#[test]
fn full_html_document() {
    let html = r#"
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>Test Email</title>
        <style>body { font-family: sans-serif; }</style>
    </head>
    <body>
        <p>Hello!</p>
    </body>
    </html>
    "#;
    let md = convert(html);
    assert_eq!(md, "Hello!");
}

#[test]
fn data_uri_image_not_tracking_pixel() {
    // A data URI image that's not 1x1 should render
    let html = r#"<img src="data:image/png;base64,iVBOR..." alt="inline" width="100">"#;
    let md = convert(html);
    assert!(md.contains("![inline]"));
}

#[test]
fn blockquote_with_paragraphs() {
    let html = "<blockquote><p>First para</p><p>Second para</p></blockquote>";
    let md = convert(html);
    assert!(md.contains("> First para"));
    assert!(md.contains("> "));
    assert!(md.contains("> Second para"));
}