html2md/rewriter/
handle.rs

1use super::anchors::{rewrite_anchor_element, rewrite_anchor_element_send};
2use super::iframes::{handle_iframe, handle_iframe_send};
3use super::images::{rewrite_image_element, rewrite_image_element_send};
4use super::lists::{handle_list_or_item, handle_list_or_item_send};
5use super::quotes::{rewrite_blockquote_element, rewrite_blockquote_element_send};
6use super::styles::{rewrite_style_element, rewrite_style_element_send};
7use super::{
8    insert_newline_after, insert_newline_after_send, insert_newline_before,
9    insert_newline_before_send,
10};
11use lol_html::html_content::ContentType::{Html, Text};
12use lol_html::html_content::Element;
13use std::rc::Rc;
14use std::sync::atomic::AtomicUsize;
15use std::sync::Arc;
16use url::Url;
17
18/// Handle the lol_html tag.
19#[inline]
20pub fn handle_tag(
21    element: &mut Element,
22    commonmark: bool,
23    url: &Option<Url>,
24    mut list_type: &mut Option<String>,
25    order_counter: &mut usize,
26    quote_depth: Rc<AtomicUsize>,
27    inside_table: &mut bool,
28) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
29    let element_name = element.tag_name();
30
31    let remove_attrs =
32        commonmark && (element_name.as_str() == "sub" || element_name.as_str() == "sup");
33
34    // check common mark includes.
35    if remove_attrs {
36        let attrs = element
37            .attributes()
38            .iter()
39            .map(|f| f.name())
40            .collect::<Vec<String>>();
41
42        for attr in attrs.iter() {
43            element.remove_attribute(&attr);
44        }
45    } else {
46        element.remove_and_keep_content();
47    }
48
49    // Add the markdown equivalents before the element.
50    match element_name.as_str() {
51        "h1" => {
52            element.before("# ", Text);
53            insert_newline_after(element);
54        }
55        "h2" => {
56            element.before("## ", Text);
57            insert_newline_after(element);
58        }
59        "h3" => {
60            element.before("### ", Text);
61            insert_newline_after(element);
62        }
63        "h4" => {
64            element.before("#### ", Text);
65            insert_newline_after(element);
66        }
67        "h5" => {
68            element.before("##### ", Text);
69            insert_newline_after(element);
70        }
71        "h6" => {
72            element.before("###### ", Text);
73            insert_newline_after(element);
74        }
75        "p" => {
76            insert_newline_before(element);
77            insert_newline_after(element);
78        }
79        "hr" => {
80            insert_newline_before(element);
81            element.append("---", Text);
82            insert_newline_after(element);
83        }
84        "br" => insert_newline_after(element),
85        "a" => {
86            let _ = rewrite_anchor_element(element, commonmark, url);
87        }
88        "img" => {
89            let _ = rewrite_image_element(element, commonmark, &url);
90        }
91        "table" => {
92            *inside_table = true;
93        }
94        "tr" => {
95            insert_newline_after(element);
96        }
97        "th" => {
98            // add the first table row start
99            if *inside_table {
100                element.before("|", Html);
101                *inside_table = false;
102            }
103            if commonmark {
104                element.before("** ", Html);
105                element.after("** |", Html);
106            } else {
107                element.after("|", Html);
108            }
109        }
110        "td" => {
111            element.after("|", Html);
112        }
113        "iframe" => {
114            let _ = handle_iframe(element);
115        }
116        "b" | "i" | "s" | "strong" | "em" | "del" => {
117            let _ = rewrite_style_element(element);
118        }
119        "ol" | "ul" | "menu" | "li" => {
120            let _ = handle_list_or_item(element, &mut list_type, order_counter);
121        }
122        "q" | "cite" | "blockquote" => {
123            let _ = rewrite_blockquote_element(element, quote_depth);
124        }
125        "div" | "section" | "header" | "footer" => {
126            insert_newline_before(element);
127            insert_newline_after(element);
128        }
129        "pre" => {
130            element.before("\n```\n", Html);
131            element.after("\n```\n", Html);
132        }
133        "code" | "samp" => {
134            element.before("`", Html);
135            element.after("`", Html);
136        }
137        _ => (),
138    }
139
140    Ok(())
141}
142
143/// Handle the lol_html tag.
144#[inline]
145pub fn handle_tag_send(
146    element: &mut lol_html::send::Element,
147    commonmark: bool,
148    url: &Option<Url>,
149    list_type: &mut Option<String>,
150    order_counter: &mut usize,
151    quote_depth: Arc<AtomicUsize>,
152    inside_table: &mut bool,
153) -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> {
154    let element_name = element.tag_name();
155
156    let remove_attrs =
157        commonmark && (element_name.as_str() == "sub" || element_name.as_str() == "sup");
158
159    // check common mark includes.
160    if remove_attrs {
161        let attrs = element
162            .attributes()
163            .iter()
164            .map(|f| f.name())
165            .collect::<Vec<String>>();
166
167        for attr in attrs.iter() {
168            element.remove_attribute(&attr);
169        }
170    } else {
171        element.remove_and_keep_content();
172    }
173
174    // Add the markdown equivalents before the element.
175    match element_name.as_str() {
176        "h1" => {
177            element.before("# ", Text);
178            insert_newline_after_send(element);
179        }
180        "h2" => {
181            element.before("## ", Text);
182            insert_newline_after_send(element);
183        }
184        "h3" => {
185            element.before("### ", Text);
186            insert_newline_after_send(element);
187        }
188        "h4" => {
189            element.before("#### ", Text);
190            insert_newline_after_send(element);
191        }
192        "h5" => {
193            element.before("##### ", Text);
194            insert_newline_after_send(element);
195        }
196        "h6" => {
197            element.before("###### ", Text);
198            insert_newline_after_send(element);
199        }
200        "p" => {
201            insert_newline_before_send(element);
202            insert_newline_after_send(element);
203        }
204        "hr" => {
205            insert_newline_before_send(element);
206            element.append("---", Text);
207            insert_newline_after_send(element);
208        }
209        "br" => insert_newline_after_send(element),
210        "a" => {
211            let _ = rewrite_anchor_element_send(element, commonmark, url);
212        }
213        "img" => {
214            let _ = rewrite_image_element_send(element, commonmark, &url);
215        }
216        "table" => *inside_table = true,
217        "tr" => {
218            insert_newline_after_send(element);
219        }
220        "th" => {
221            if *inside_table {
222                element.before("|", Html);
223                *inside_table = false
224            }
225            if commonmark {
226                element.before("** ", Html);
227                element.after("** |", Html);
228            } else {
229                element.after("|", Html);
230            }
231        }
232        "td" => {
233            element.after("|", Html);
234        }
235        "iframe" => {
236            let _ = handle_iframe_send(element);
237        }
238        "b" | "i" | "s" | "strong" | "em" | "del" => {
239            let _ = rewrite_style_element_send(element);
240        }
241        "ol" | "ul" | "menu" | "li" => {
242            let _ = handle_list_or_item_send(element, list_type, order_counter);
243        }
244        "q" | "cite" | "blockquote" => {
245            let _ = rewrite_blockquote_element_send(element, quote_depth.clone());
246        }
247        "div" | "section" | "header" | "footer" => {
248            insert_newline_before_send(element);
249            insert_newline_after_send(element);
250        }
251        "pre" => {
252            element.before("\n```\n", Html);
253            element.after("\n```\n", Html);
254        }
255        "code" | "samp" => {
256            element.before("`", Html);
257            element.after("`", Html);
258        }
259        _ => (),
260    }
261
262    Ok(())
263}