skymark 0.1.1

HTML-to-Markdown converter prioritizing proper conversion for human readability
Documentation
#![allow(missing_docs, reason = "allow in tests")]

mod common;

use skymark::{HtmlToMarkdown, TranslatorConfig};

#[test]
fn removes_uncaught_doctype() {
    assert_eq!(common::translate("<!DOCTYPE html>abc"), "abc");
}

#[test]
fn handles_whitespace_with_single_space() {
    let html = "<span>test</span>  <span>test2 </span>\n<span>test3</span>\r\n\r\n\t\t\t<span>test4</span>\t<span>test5\r\n\n\n\t\ttest6</span>";
    assert_eq!(
        common::translate(html),
        "test test2 test3 test4 test5 test6"
    );
}

#[test]
fn removes_nested_text_formatting_tags_strong() {
    assert_eq!(
        common::translate("<strong>My <strong>bold</strong> text</strong>"),
        "**My bold text**"
    );
}

#[test]
fn handles_whitespace_for_strong_tag() {
    assert_eq!(
        common::translate("<p><strong> &nbsp;Label:&nbsp; </strong>Value</p>"),
        " **Label:** Value"
    );
}

#[test]
fn childless_nodes_visited_if_preserve_if_empty_set() {
    let html = "<span>Hello</span><iframe src=\"https://radio4000.com\"/><span>World</span>";

    let without_preserve = HtmlToMarkdown::with_options_and_translators(
        skymark::Options::default(),
        vec![(
            "iframe".to_owned(),
            TranslatorConfig {
                content: Some("[iframe]".to_owned()),
                ..TranslatorConfig::default()
            },
        )],
        Vec::<(String, TranslatorConfig)>::new(),
    );

    let with_preserve = HtmlToMarkdown::with_options_and_translators(
        skymark::Options::default(),
        vec![(
            "iframe".to_owned(),
            TranslatorConfig {
                content: Some("[iframe]".to_owned()),
                preserve_if_empty: true,
                ..TranslatorConfig::default()
            },
        )],
        Vec::<(String, TranslatorConfig)>::new(),
    );

    assert_eq!(without_preserve.translate(html), "HelloWorld");
    assert_eq!(with_preserve.translate(html), "Hello[iframe]World");
}

#[test]
fn handles_mixed_case_tags() {
    assert_eq!(common::translate("Foo<Br>Bar"), "Foo  \nBar");
    assert_eq!(common::translate("Hello<BR>World"), "Hello  \nWorld");
    assert_eq!(common::translate("<DIV>content</DIV>"), "content");
    assert_eq!(
        common::translate("<Strong>Bold</Strong> and <Em>Italic</Em>"),
        "**Bold** and _Italic_"
    );
    assert_eq!(
        common::translate("Before<Hr>After"),
        "Before\n\n---\n\nAfter"
    );
    assert_eq!(
        common::translate("<Ul><Li>Item 1</Li><Li>Item 2</Li></Ul>"),
        "* Item 1\n* Item 2"
    );
    assert_eq!(
        common::translate("<H1>Title</H1><H2>Subtitle</H2>"),
        "# Title\n\n## Subtitle"
    );
}

#[test]
fn code_blocks_preserve_whitespace_and_decode_entities() {
    let html = "<pre><code><span><span class=\"comment\">// &gt; Get URL Path</span></span>\n<span><span class=\"declaration\">function getURL(s: string): string {\n</span></span><span>    <span class=\"return\">return</span> `https://myurl.com/${s}`;</span>\n<span>}</span></pre></code>";
    let expected = "```\n// > Get URL Path\nfunction getURL(s: string): string {\n    return `https://myurl.com/${s}`;\n}\n```";
    assert_eq!(common::translate(html), expected);
}