html2md_rs/to_md.rs
1//! This module contains functions that converts a Node to a markdown string.
2
3use crate::{
4 parser::ParseHTMLError,
5 structs::{AttributeValues, Node, NodeType::*, ToMdConfig},
6};
7
8/// Converts a Node to a markdown string.
9///
10/// # Arguments
11///
12/// * `node` - A Node to be converted to markdown.
13///
14/// # Examples
15///
16/// ```
17/// use html2md_rs::{
18/// structs::{
19/// Node,
20/// NodeType::{Text, H1},
21/// },
22/// to_md::to_md,
23/// };
24///
25/// let input = Node {
26/// tag_name: Some(H1),
27/// value: None,
28/// attributes: None,
29/// within_special_tag: None,
30/// children: vec![Node {
31/// tag_name: Some(Text),
32/// value: Some("Hello world".to_string()),
33/// attributes: None,
34/// within_special_tag: None,
35/// children: Vec::new(),
36/// }],
37/// };
38/// let parsed = to_md(input);
39///
40/// assert_eq!(parsed, "# Hello world\n");
41/// ```
42pub fn to_md(node: Node) -> String {
43 to_md_with_config(node, &ToMdConfig::default())
44}
45
46/// Converts a Node to a markdown string with custom config.
47///
48/// # Arguments
49///
50/// * `node` - A `Node` to be converted to markdown.
51/// * `config` - A custom configuration, `ToMdConfig`, to use to configure how to render the output markdown.
52///
53/// # Example's
54/// ```
55/// use html2md_rs::{
56/// structs::{
57/// Node,
58/// NodeType::{Div, Text, H1, P},
59/// ToMdConfig,
60/// },
61/// to_md::to_md_with_config,
62/// };
63///
64/// let input = Node {
65/// tag_name: Some(Div),
66/// children: vec![
67/// Node {
68/// tag_name: Some(H1),
69/// children: vec![Node {
70/// tag_name: Some(Text),
71/// value: Some("Hello world".to_string()),
72/// ..Default::default()
73/// }],
74/// ..Default::default()
75/// },
76/// Node {
77/// tag_name: Some(P),
78/// children: vec![Node {
79/// tag_name: Some(Text),
80/// value: Some("This will be ignored".to_string()),
81/// ..Default::default()
82/// }],
83/// ..Default::default()
84/// },
85/// ],
86/// ..Default::default()
87/// };
88/// let config = ToMdConfig {
89/// ignore_rendering: vec![P],
90/// };
91/// let parsed = to_md_with_config(input, &config);
92///
93/// assert_eq!(parsed, "# Hello world\n");
94/// ```
95pub fn to_md_with_config(node: Node, config: &ToMdConfig) -> String {
96 let mut res = String::new();
97 let mut tail = String::new();
98
99 let mut follow_child = true; // If the function should process the children of the node, defaults to true. False for some tags; like <ul> and <ol>.
100
101 if let Some(tag_type) = &node.tag_name {
102 if config.ignore_rendering.contains(tag_type) {
103 follow_child = false;
104 } else {
105 match tag_type {
106 h @ H1 | h @ H2 | h @ H3 | h @ H4 | h @ H5 | h @ H6 => {
107 tail.push('\n');
108 match h {
109 H1 => res.push_str("# "),
110 H2 => res.push_str("## "),
111 H3 => res.push_str("### "),
112 H4 => res.push_str("#### "),
113 H5 => res.push_str("##### "),
114 H6 => res.push_str("###### "),
115 _ => (),
116 }
117 }
118 Strong => {
119 res.push_str("**");
120 tail.push_str("**");
121 }
122 Em => {
123 res.push('*');
124 tail.push('*');
125 }
126 A => {
127 if let Some(link) = node.attributes.as_ref().and_then(|attrs| attrs.get_href())
128 {
129 let link = percent_encoding::percent_decode(link.as_bytes())
130 .decode_utf8()
131 .map(|s| s.to_string())
132 .unwrap_or(link);
133
134 res.push('[');
135 if link.contains(' ') {
136 tail.push_str(&format!("](<{}>)", link));
137 } else {
138 tail.push_str(&format!("]({})", link));
139 }
140 } else {
141 res.push('[');
142 tail.push(']');
143 }
144 }
145 Ul => {
146 for child in &node.children {
147 res.push_str(&child.leading_spaces());
148 res.push_str("- ");
149 res.push_str(&to_md(child.clone()));
150 }
151 follow_child = false;
152 }
153 Ol => {
154 let mut i = node
155 .attributes
156 .as_ref()
157 .and_then(|attrs| attrs.get("start"))
158 .and_then(|start| match start {
159 AttributeValues::String(start) => start.parse::<usize>().ok(),
160 AttributeValues::Number(start) => Some(start as usize),
161 _ => None,
162 })
163 .unwrap_or(1);
164 for child in &node.children {
165 res.push_str(&child.leading_spaces());
166 res.push_str(&format!("{}. ", i));
167 res.push_str(&to_md(child.clone()));
168 i += 1;
169 }
170 follow_child = false;
171 }
172 Li => {
173 if !&node.children.iter().any(|child| child.tag_name == Some(P)) {
174 tail.push('\n');
175 }
176 }
177 P => {
178 if node.children.is_empty() {
179 return res;
180 }
181 tail.push('\n');
182 }
183 Code => {
184 if let Some(language) = node
185 .attributes
186 .as_ref()
187 .and_then(|attr| attr.get_class())
188 .unwrap_or(&"".to_string())
189 .split_whitespace()
190 .find(|class| class.starts_with("language-"))
191 .map(|class| &class[9..])
192 {
193 res.push_str(&format!("```{}", language));
194 } else {
195 res.push_str("```\n");
196 }
197 tail.push_str("```\n");
198 }
199 Hr => {
200 res.push_str("***\n");
201 follow_child = false;
202 }
203 Br => {
204 res.push_str(" \n");
205 follow_child = false;
206 }
207 Text => {
208 if let Some(special_tags) = &node.within_special_tag {
209 if special_tags.contains(&Blockquote) {
210 res.push_str("> ");
211 }
212 }
213 res.push_str(&node.value.unwrap_or("".to_string()));
214 return res;
215 }
216 Html | Head | Style | Link | Script | Meta | Body | Div | Pre | Blockquote => (),
217 Title => {
218 follow_child = false;
219 }
220 Comment => {
221 res.push_str(&format!("<!--{}-->", &node.value.unwrap_or("".to_string())));
222 return res;
223 }
224 Unknown(tag) => {
225 res.push_str(&format!("<{}>", tag));
226 tail.push_str(&format!("</{}>", tag));
227 }
228 }
229 }
230 }
231
232 if follow_child {
233 for child in node.children {
234 res.push_str(&to_md_with_config(child, config));
235 }
236 }
237
238 res.push_str(&tail);
239
240 res
241}
242
243// https://github.com/izyuumi/html2md-rs/issues/34
244#[test]
245fn issue34() {
246 let input = "<p><a href=\"/my uri\">link</a></p>";
247 let expected = "[link](</my uri>)\n";
248 assert_eq!(safe_from_html_to_md(input.to_string()).unwrap(), expected);
249
250 let input = "<p><a href=\"/myuri\">link</a></p>";
251 let expected = "[link](/myuri)\n";
252 assert_eq!(safe_from_html_to_md(input.to_string()).unwrap(), expected);
253}
254
255/// Converts a string of HTML to a markdown string.
256///
257/// Panics if the HTML is invalid.
258///
259/// # Arguments
260///
261/// * `input` - A string of HTML to be converted to markdown.
262///
263/// # Examples
264///
265/// ```
266/// use html2md_rs::to_md::from_html_to_md;
267///
268/// let input = "<h1>Hello world</h1>".to_string();
269/// let parsed = from_html_to_md(input);
270///
271/// assert_eq!(parsed, "# Hello world\n");
272/// ```
273#[deprecated(
274 since = "0.7.0",
275 note = "This function is deprecated and will be removed in future versions. Please use the safe_parse_html function instead."
276)]
277#[allow(deprecated)]
278pub fn from_html_to_md(input: String) -> String {
279 to_md(crate::parser::parse_html(input))
280}
281
282/// Safely converts a string of HTML to a markdown string.
283///
284/// Returns an error if the HTML is invalid.
285///
286/// # Arguments
287///
288/// * `input` - A string of HTML to be converted to markdown.
289///
290/// # Examples
291///
292/// ```
293/// use html2md_rs::to_md::safe_from_html_to_md;
294///
295/// let input = "<h1>Hello world</h1>".to_string();
296/// let parsed = safe_from_html_to_md(input);
297///
298/// assert_eq!(parsed, Ok("# Hello world\n".to_string()));
299/// ```
300pub fn safe_from_html_to_md(input: String) -> Result<String, ParseHTMLError> {
301 crate::parser::safe_parse_html(input).map(to_md)
302}
303
304/// Safely converts a string of HTML to a markdown string with custom config.
305///
306/// Returns an error if the HTML is invalid.
307///
308/// # Arguments
309///
310/// * `input` - A string of HTML to be converted to markdown.
311/// * `config` - Custom configuration `ToMdConfig`
312///
313/// # Examples
314///
315/// ```
316/// use html2md_rs::{
317/// structs::{NodeType::P, ToMdConfig},
318/// to_md::safe_from_html_to_md_with_config,
319/// };
320///
321/// let input = "<h1>Hello world</h1><p>this will not be rendered</p>".to_string();
322/// let config = ToMdConfig {
323/// ignore_rendering: vec![P],
324/// };
325/// let parsed = safe_from_html_to_md_with_config(input, &config);
326///
327/// assert_eq!(parsed, Ok("# Hello world\n".to_string()));
328/// ```
329pub fn safe_from_html_to_md_with_config(
330 input: String,
331 config: &ToMdConfig,
332) -> Result<String, ParseHTMLError> {
333 crate::parser::safe_parse_html(input).map(|html| to_md_with_config(html, config))
334}