use crate::{
parser::ParseHTMLError,
structs::{AttributeValues, Node, NodeType::*, ToMdConfig},
};
pub fn to_md(node: Node) -> String {
to_md_with_config(node, &ToMdConfig::default())
}
pub fn to_md_with_config(node: Node, config: &ToMdConfig) -> String {
let mut res = String::new();
let mut tail = String::new();
let mut follow_child = true;
if let Some(tag_type) = &node.tag_name {
if config.ignore_rendering.contains(tag_type) {
follow_child = false;
} else {
match tag_type {
h @ H1 | h @ H2 | h @ H3 | h @ H4 | h @ H5 | h @ H6 => {
tail.push('\n');
match h {
H1 => res.push_str("# "),
H2 => res.push_str("## "),
H3 => res.push_str("### "),
H4 => res.push_str("#### "),
H5 => res.push_str("##### "),
H6 => res.push_str("###### "),
_ => (),
}
}
Strong => {
res.push_str("**");
tail.push_str("**");
}
Em => {
res.push('*');
tail.push('*');
}
A => {
if let Some(link) = node.attributes.as_ref().and_then(|attrs| attrs.get_href())
{
let link = percent_encoding::percent_decode(link.as_bytes())
.decode_utf8()
.map(|s| s.to_string())
.unwrap_or(link);
res.push('[');
if link.contains(' ') {
tail.push_str(&format!("](<{}>)", link));
} else {
tail.push_str(&format!("]({})", link));
}
} else {
res.push('[');
tail.push(']');
}
}
Ul => {
for child in &node.children {
res.push_str(&child.leading_spaces());
res.push_str("- ");
res.push_str(&to_md(child.clone()));
}
follow_child = false;
}
Ol => {
let mut i = node
.attributes
.as_ref()
.and_then(|attrs| attrs.get("start"))
.and_then(|start| match start {
AttributeValues::String(start) => start.parse::<usize>().ok(),
AttributeValues::Number(start) => Some(start as usize),
_ => None,
})
.unwrap_or(1);
for child in &node.children {
res.push_str(&child.leading_spaces());
res.push_str(&format!("{}. ", i));
res.push_str(&to_md(child.clone()));
i += 1;
}
follow_child = false;
}
Li => {
if !&node.children.iter().any(|child| child.tag_name == Some(P)) {
tail.push('\n');
}
}
P => {
if node.children.is_empty() {
return res;
}
tail.push('\n');
}
Code => {
if let Some(language) = node
.attributes
.as_ref()
.and_then(|attr| attr.get_class())
.unwrap_or(&"".to_string())
.split_whitespace()
.find(|class| class.starts_with("language-"))
.map(|class| &class[9..])
{
res.push_str(&format!("```{}", language));
} else {
res.push_str("```\n");
}
tail.push_str("```\n");
}
Hr => {
res.push_str("***\n");
follow_child = false;
}
Br => {
res.push_str(" \n");
follow_child = false;
}
Text => {
if let Some(special_tags) = &node.within_special_tag {
if special_tags.contains(&Blockquote) {
res.push_str("> ");
}
}
res.push_str(&node.value.unwrap_or("".to_string()));
return res;
}
Html | Head | Style | Link | Script | Meta | Body | Div | Pre | Blockquote => (),
Title => {
follow_child = false;
}
Comment => {
res.push_str(&format!("<!--{}-->", &node.value.unwrap_or("".to_string())));
return res;
}
Unknown(tag) => {
res.push_str(&format!("<{}>", tag));
tail.push_str(&format!("</{}>", tag));
}
}
}
}
if follow_child {
for child in node.children {
res.push_str(&to_md_with_config(child, config));
}
}
res.push_str(&tail);
res
}
#[test]
fn issue34() {
let input = "<p><a href=\"/my uri\">link</a></p>";
let expected = "[link](</my uri>)\n";
assert_eq!(safe_from_html_to_md(input.to_string()).unwrap(), expected);
let input = "<p><a href=\"/myuri\">link</a></p>";
let expected = "[link](/myuri)\n";
assert_eq!(safe_from_html_to_md(input.to_string()).unwrap(), expected);
}
#[deprecated(
since = "0.7.0",
note = "This function is deprecated and will be removed in future versions. Please use the safe_parse_html function instead."
)]
#[allow(deprecated)]
pub fn from_html_to_md(input: String) -> String {
to_md(crate::parser::parse_html(input))
}
pub fn safe_from_html_to_md(input: String) -> Result<String, ParseHTMLError> {
crate::parser::safe_parse_html(input).map(to_md)
}
pub fn safe_from_html_to_md_with_config(
input: String,
config: &ToMdConfig,
) -> Result<String, ParseHTMLError> {
crate::parser::safe_parse_html(input).map(|html| to_md_with_config(html, config))
}