html2md_rs/
to_md.rs

1//! This module contains functions that converts a Node to a markdown string.
2
3use crate::{
4    parser::ParseHTMLError,
5    structs::{AttributeValues, Node, NodeType::*, ToMdConfig},
6};
7
8/// Converts a Node to a markdown string.
9///
10/// # Arguments
11///
12/// * `node` - A Node to be converted to markdown.
13///
14/// # Examples
15///
16/// ```
17/// use html2md_rs::{
18///     structs::{
19///         Node,
20///         NodeType::{Text, H1},
21///     },
22///     to_md::to_md,
23/// };
24///
25/// let input = Node {
26///     tag_name: Some(H1),
27///     value: None,
28///     attributes: None,
29///     within_special_tag: None,
30///     children: vec![Node {
31///         tag_name: Some(Text),
32///         value: Some("Hello world".to_string()),
33///         attributes: None,
34///         within_special_tag: None,
35///         children: Vec::new(),
36///     }],
37/// };
38/// let parsed = to_md(input);
39///
40/// assert_eq!(parsed, "# Hello world\n");
41/// ```
42pub fn to_md(node: Node) -> String {
43    to_md_with_config(node, &ToMdConfig::default())
44}
45
46/// Converts a Node to a markdown string with custom config.
47///
48/// # Arguments
49///
50/// * `node` - A `Node` to be converted to markdown.
51/// * `config` - A custom configuration, `ToMdConfig`, to use to configure how to render the output markdown.
52///
53/// # Example's
54/// ```
55/// use html2md_rs::{
56///     structs::{
57///         Node,
58///         NodeType::{Div, Text, H1, P},
59///         ToMdConfig,
60///     },
61///     to_md::to_md_with_config,
62/// };
63///
64/// let input = Node {
65///     tag_name: Some(Div),
66///     children: vec![
67///         Node {
68///             tag_name: Some(H1),
69///             children: vec![Node {
70///                 tag_name: Some(Text),
71///                 value: Some("Hello world".to_string()),
72///                 ..Default::default()
73///             }],
74///             ..Default::default()
75///         },
76///         Node {
77///             tag_name: Some(P),
78///             children: vec![Node {
79///                 tag_name: Some(Text),
80///                 value: Some("This will be ignored".to_string()),
81///                 ..Default::default()
82///             }],
83///             ..Default::default()
84///         },
85///     ],
86///     ..Default::default()
87/// };
88/// let config = ToMdConfig {
89///     ignore_rendering: vec![P],
90/// };
91/// let parsed = to_md_with_config(input, &config);
92///
93/// assert_eq!(parsed, "# Hello world\n");
94/// ```
95pub fn to_md_with_config(node: Node, config: &ToMdConfig) -> String {
96    let mut res = String::new();
97    let mut tail = String::new();
98
99    let mut follow_child = true; // If the function should process the children of the node, defaults to true. False for some tags; like <ul> and <ol>.
100
101    if let Some(tag_type) = &node.tag_name {
102        if config.ignore_rendering.contains(tag_type) {
103            follow_child = false;
104        } else {
105            match tag_type {
106                h @ H1 | h @ H2 | h @ H3 | h @ H4 | h @ H5 | h @ H6 => {
107                    tail.push('\n');
108                    match h {
109                        H1 => res.push_str("# "),
110                        H2 => res.push_str("## "),
111                        H3 => res.push_str("### "),
112                        H4 => res.push_str("#### "),
113                        H5 => res.push_str("##### "),
114                        H6 => res.push_str("###### "),
115                        _ => (),
116                    }
117                }
118                Strong => {
119                    res.push_str("**");
120                    tail.push_str("**");
121                }
122                Em => {
123                    res.push('*');
124                    tail.push('*');
125                }
126                A => {
127                    if let Some(link) = node.attributes.as_ref().and_then(|attrs| attrs.get_href())
128                    {
129                        let link = percent_encoding::percent_decode(link.as_bytes())
130                            .decode_utf8()
131                            .map(|s| s.to_string())
132                            .unwrap_or(link);
133
134                        res.push('[');
135                        if link.contains(' ') {
136                            tail.push_str(&format!("](<{}>)", link));
137                        } else {
138                            tail.push_str(&format!("]({})", link));
139                        }
140                    } else {
141                        res.push('[');
142                        tail.push(']');
143                    }
144                }
145                Ul => {
146                    for child in &node.children {
147                        res.push_str(&child.leading_spaces());
148                        res.push_str("- ");
149                        res.push_str(&to_md(child.clone()));
150                    }
151                    follow_child = false;
152                }
153                Ol => {
154                    let mut i = node
155                        .attributes
156                        .as_ref()
157                        .and_then(|attrs| attrs.get("start"))
158                        .and_then(|start| match start {
159                            AttributeValues::String(start) => start.parse::<usize>().ok(),
160                            AttributeValues::Number(start) => Some(start as usize),
161                            _ => None,
162                        })
163                        .unwrap_or(1);
164                    for child in &node.children {
165                        res.push_str(&child.leading_spaces());
166                        res.push_str(&format!("{}. ", i));
167                        res.push_str(&to_md(child.clone()));
168                        i += 1;
169                    }
170                    follow_child = false;
171                }
172                Li => {
173                    if !&node.children.iter().any(|child| child.tag_name == Some(P)) {
174                        tail.push('\n');
175                    }
176                }
177                P => {
178                    if node.children.is_empty() {
179                        return res;
180                    }
181                    tail.push('\n');
182                }
183                Code => {
184                    if let Some(language) = node
185                        .attributes
186                        .as_ref()
187                        .and_then(|attr| attr.get_class())
188                        .unwrap_or(&"".to_string())
189                        .split_whitespace()
190                        .find(|class| class.starts_with("language-"))
191                        .map(|class| &class[9..])
192                    {
193                        res.push_str(&format!("```{}", language));
194                    } else {
195                        res.push_str("```\n");
196                    }
197                    tail.push_str("```\n");
198                }
199                Hr => {
200                    res.push_str("***\n");
201                    follow_child = false;
202                }
203                Br => {
204                    res.push_str("  \n");
205                    follow_child = false;
206                }
207                Text => {
208                    if let Some(special_tags) = &node.within_special_tag {
209                        if special_tags.contains(&Blockquote) {
210                            res.push_str("> ");
211                        }
212                    }
213                    res.push_str(&node.value.unwrap_or("".to_string()));
214                    return res;
215                }
216                Html | Head | Style | Link | Script | Meta | Body | Div | Pre | Blockquote => (),
217                Title => {
218                    follow_child = false;
219                }
220                Comment => {
221                    res.push_str(&format!("<!--{}-->", &node.value.unwrap_or("".to_string())));
222                    return res;
223                }
224                Unknown(tag) => {
225                    res.push_str(&format!("<{}>", tag));
226                    tail.push_str(&format!("</{}>", tag));
227                }
228            }
229        }
230    }
231
232    if follow_child {
233        for child in node.children {
234            res.push_str(&to_md_with_config(child, config));
235        }
236    }
237
238    res.push_str(&tail);
239
240    res
241}
242
243// https://github.com/izyuumi/html2md-rs/issues/34
244#[test]
245fn issue34() {
246    let input = "<p><a href=\"/my uri\">link</a></p>";
247    let expected = "[link](</my uri>)\n";
248    assert_eq!(safe_from_html_to_md(input.to_string()).unwrap(), expected);
249
250    let input = "<p><a href=\"/myuri\">link</a></p>";
251    let expected = "[link](/myuri)\n";
252    assert_eq!(safe_from_html_to_md(input.to_string()).unwrap(), expected);
253}
254
255/// Converts a string of HTML to a markdown string.
256///
257/// Panics if the HTML is invalid.
258///
259/// # Arguments
260///
261/// * `input` - A string of HTML to be converted to markdown.
262///
263/// # Examples
264///
265/// ```
266/// use html2md_rs::to_md::from_html_to_md;
267///
268/// let input = "<h1>Hello world</h1>".to_string();
269/// let parsed = from_html_to_md(input);
270///
271/// assert_eq!(parsed, "# Hello world\n");
272/// ```
273#[deprecated(
274    since = "0.7.0",
275    note = "This function is deprecated and will be removed in future versions. Please use the safe_parse_html function instead."
276)]
277#[allow(deprecated)]
278pub fn from_html_to_md(input: String) -> String {
279    to_md(crate::parser::parse_html(input))
280}
281
282/// Safely converts a string of HTML to a markdown string.
283///
284/// Returns an error if the HTML is invalid.
285///
286/// # Arguments
287///
288/// * `input` - A string of HTML to be converted to markdown.
289///
290/// # Examples
291///
292/// ```
293/// use html2md_rs::to_md::safe_from_html_to_md;
294///
295/// let input = "<h1>Hello world</h1>".to_string();
296/// let parsed = safe_from_html_to_md(input);
297///
298/// assert_eq!(parsed, Ok("# Hello world\n".to_string()));
299/// ```
300pub fn safe_from_html_to_md(input: String) -> Result<String, ParseHTMLError> {
301    crate::parser::safe_parse_html(input).map(to_md)
302}
303
304/// Safely converts a string of HTML to a markdown string with custom config.
305///
306/// Returns an error if the HTML is invalid.
307///
308/// # Arguments
309///
310/// * `input` - A string of HTML to be converted to markdown.
311/// * `config` - Custom configuration `ToMdConfig`
312///
313/// # Examples
314///
315/// ```
316/// use html2md_rs::{
317///     structs::{NodeType::P, ToMdConfig},
318///     to_md::safe_from_html_to_md_with_config,
319/// };
320///
321/// let input = "<h1>Hello world</h1><p>this will not be rendered</p>".to_string();
322/// let config = ToMdConfig {
323///     ignore_rendering: vec![P],
324/// };
325/// let parsed = safe_from_html_to_md_with_config(input, &config);
326///
327/// assert_eq!(parsed, Ok("# Hello world\n".to_string()));
328/// ```
329pub fn safe_from_html_to_md_with_config(
330    input: String,
331    config: &ToMdConfig,
332) -> Result<String, ParseHTMLError> {
333    crate::parser::safe_parse_html(input).map(|html| to_md_with_config(html, config))
334}