#![allow(missing_docs, reason = "allow in tests")]
#![allow(clippy::unwrap_used, reason = "allow in tests")]
use regex::Regex;
use skymark::{CodeBlockStyle, HtmlToMarkdown, Options};
#[test]
fn keep_data_images_option() {
let mut converter = HtmlToMarkdown::new();
converter.options_mut().keep_data_images = true;
assert_eq!(
converter.translate(
"<img alt=\"normal\" src=\"normal_img.jpg\">\n <img src=\"data:image/gif;base64,R0lGODlhEA\"/>",
),
" "
);
converter.options_mut().keep_data_images = false;
assert_eq!(
converter.translate(
"<img alt=\"normal\" src=\"normal_img.jpg\">\n <img src=\"data:image/gif;base64,R0lGODlhEA\"/>",
),
""
);
}
#[test]
fn use_link_reference_definitions_option() {
let mut converter = HtmlToMarkdown::new();
let url = "http://www.github.com/crosstype";
let html = format!(
"Hello: \n <a href=\"{url}\">a<br><br>b<strong>c</strong></a>\n <a>a<strong>b</strong></a>\n <a href=\"{url}/other\">link2</a>\n <a href=\"{url}\">repeat link</a>\n <a href=\"{url}\">{url}</a> Goodbye!\n "
);
converter.options_mut().use_link_reference_definitions = false;
assert_eq!(
converter.translate(&html),
format!(
"Hello: [a b**c**]({url}) a**b** [link2]({url}/other) [repeat link]({url}) <{url}> Goodbye!"
)
);
converter.options_mut().use_link_reference_definitions = true;
assert_eq!(
converter.translate(&html),
format!(
"Hello: [a b**c**][1] a**b** [link2][2] [repeat link][1] <{url}> Goodbye! \n\n[1]: {url}\n[2]: {url}/other"
)
);
}
#[test]
fn use_inline_links_option() {
let mut converter = HtmlToMarkdown::new();
let url = "http://www.github.com/crosstype";
let html = format!(
"Hello: \n <a href=\"{url}\">{url}</a>\n <a>a<strong>b</strong></a>\n <a href=\"{url}/other\">link2</a>\n <a href=\"{url}\">repeat link</a> Goodbye!\n "
);
converter.options_mut().use_inline_links = false;
assert_eq!(
converter.translate(&html),
format!("Hello: [{url}]({url}) a**b** [link2]({url}/other) [repeat link]({url}) Goodbye!")
);
converter.options_mut().use_inline_links = true;
assert_eq!(
converter.translate(&html),
format!("Hello: <{url}> a**b** [link2]({url}/other) [repeat link]({url}) Goodbye!")
);
}
#[test]
fn code_fence_option() {
let mut converter = HtmlToMarkdown::new();
let text = "* test \n\n1. test\n\\Test";
let html = format!("<pre><code class=\"language-fortran\">{text}</code></pre>");
assert_eq!(
converter.translate(&html),
format!("```fortran\n{text}\n```")
);
converter.options_mut().code_fence = "+++++".to_owned();
assert_eq!(
converter.translate(&html),
format!("+++++fortran\n{text}\n+++++")
);
}
#[test]
fn code_block_style_option() {
let mut converter = HtmlToMarkdown::new();
let html = "<pre><code>line1\nline2</code></pre>";
converter.options_mut().code_block_style = CodeBlockStyle::Fenced;
assert_eq!(converter.translate(html), "```\nline1\nline2\n```");
converter.options_mut().code_block_style = CodeBlockStyle::Indented;
assert_eq!(converter.translate(html), " line1\n line2");
}
#[test]
fn em_delimiter_option() {
let mut converter = HtmlToMarkdown::new();
let html = "<em>some text</em><em>more text</em>";
assert_eq!(converter.translate(html), "_some text_ _more text_");
converter.options_mut().em_delimiter = "|".to_owned();
assert_eq!(converter.translate(html), "|some text| |more text|");
converter.options_mut().em_delimiter = "+++".to_owned();
assert_eq!(converter.translate(html), "+++some text+++ +++more text+++");
}
#[test]
fn strong_delimiter_option() {
let mut converter = HtmlToMarkdown::new();
let html = "<strong>some text</strong><strong>more text</strong>";
assert_eq!(converter.translate(html), "**some text** **more text**");
converter.options_mut().strong_delimiter = "|".to_owned();
assert_eq!(converter.translate(html), "|some text| |more text|");
converter.options_mut().strong_delimiter = "+++".to_owned();
assert_eq!(converter.translate(html), "+++some text+++ +++more text+++");
}
#[test]
fn strike_delimiter_option() {
let mut converter = HtmlToMarkdown::new();
let html = "<strike>some text</strike><s>more text</s><del>one more text</del>";
assert_eq!(
converter.translate(html),
"~~some text~~ ~~more text~~ ~~one more text~~"
);
converter.options_mut().strike_delimiter = "~".to_owned();
assert_eq!(
converter.translate(html),
"~some text~ ~more text~ ~one more text~"
);
converter.options_mut().strike_delimiter = "+++".to_owned();
assert_eq!(
converter.translate(html),
"+++some text+++ +++more text+++ +++one more text+++"
);
}
#[test]
#[allow(clippy::trivial_regex, reason = "required for test")]
fn text_replace_option() {
let mut converter = HtmlToMarkdown::new();
converter.options_mut().text_replace = vec![(Regex::new("abc").unwrap(), "xyz".to_owned())];
assert_eq!(converter.translate("<h1>hello abc</h1>"), "# hello xyz");
}
#[test]
fn line_start_escape_option() {
let mut converter = HtmlToMarkdown::new();
assert_eq!(
converter.translate("<p>text<br>+ text<br>+ more text</p>"),
"text \n\\+ text \n\\+ more text"
);
assert_eq!(
converter.translate("<p>text<br>> text<br>> more text</p>"),
"text \n\\> text \n\\> more text"
);
converter.options_mut().line_start_escape = (
Regex::new(r"(?m)^(\s*?)((?:[=>-])|(?:#{1,6}\s))|(?:(\d+)(\.\s))").unwrap(),
"$1$3\\$2$4".to_owned(),
);
assert_eq!(
converter.translate("<p>text<br>+ text<br>+ more text</p>"),
"text \n+ text \n+ more text"
);
}
#[test]
fn global_escape_option() {
let mut converter = HtmlToMarkdown::new();
assert_eq!(
converter.translate("<strong>text**text</strong>"),
"**text\\*\\*text**"
);
converter.options_mut().global_escape = (Regex::new(r"[_~\[\]]").unwrap(), r"\$0".to_owned());
assert_eq!(converter.translate("<i>text**text</i>"), "_text**text_");
assert_eq!(
converter.translate("<h1>title [more words]</h1>"),
"# title \\[more words\\]"
);
}
#[test]
fn bullet_marker_option() {
let mut converter = HtmlToMarkdown::new();
let html = "<ul><li>item1</li><li>item2</li></ul>";
assert_eq!(converter.translate(html), "* item1\n* item2");
converter.options_mut().bullet_marker = "-".to_owned();
assert_eq!(converter.translate(html), "- item1\n- item2");
converter.options_mut().bullet_marker = "<->".to_owned();
assert_eq!(converter.translate(html), "<-> item1\n<-> item2");
}
#[test]
fn ignore_option() {
let html = "<strong>some text</strong><em>more text</em>";
let ignore_strong = Options {
ignore: vec!["STRONG".to_owned()],
..Options::default()
};
assert_eq!(
HtmlToMarkdown::with_options(ignore_strong).translate(html),
"_more text_"
);
let ignore_em = Options {
ignore: vec!["EM".to_owned()],
..Options::default()
};
assert_eq!(
HtmlToMarkdown::with_options(ignore_em).translate(html),
"**some text**"
);
let ignore_both = Options {
ignore: vec!["EM".to_owned(), "STRONG".to_owned()],
..Options::default()
};
assert_eq!(
HtmlToMarkdown::with_options(ignore_both).translate(html),
""
);
}
#[test]
fn ignore_block_elements_option() {
let html = "<p>Before</p><nav>Navigation content</nav><p>After</p>";
let options = Options {
ignore: vec!["nav".to_owned()],
..Options::default()
};
assert_eq!(
HtmlToMarkdown::with_options(options).translate(html),
"Before\n\nAfter"
);
}
#[test]
fn block_elements_option() {
let html = "<em>x</em><strong>yyy</strong><em>x</em><span>text</span>";
let strong_block = Options {
block_elements: vec!["STRONG".to_owned()],
..Options::default()
};
assert_eq!(
HtmlToMarkdown::with_options(strong_block).translate(html),
"_x_\n\n**yyy**\n\n_x_text"
);
let em_block = Options {
block_elements: vec!["EM".to_owned()],
..Options::default()
};
assert_eq!(
HtmlToMarkdown::with_options(em_block).translate(html),
"_x_\n\n**yyy**\n\n_x_\n\ntext"
);
}
#[test]
fn max_consecutive_newlines_option() {
let mut converter = HtmlToMarkdown::new();
let html = format!("<b>text</b>{}<em>something</em>", "<br/>".repeat(10));
assert_eq!(
converter.translate(&html),
format!("**text**{}_something_", " \n".repeat(3))
);
converter.options_mut().max_consecutive_newlines = 5;
assert_eq!(
converter.translate(&html),
format!("**text**{}_something_", " \n".repeat(5))
);
}