html2md/
lists.rs

1use super::StructuredPrinter;
2use super::TagHandler;
3
4use markup5ever_rcdom::Handle;
5
6/// gets all list elements registered by a `StructuredPrinter` in reverse order
7fn list_hierarchy(printer: &mut StructuredPrinter) -> Vec<&String> {
8    printer
9        .parent_chain
10        .iter()
11        .rev()
12        .filter(|&tag| tag == "ul" || tag == "ol" || tag == "menu")
13        .collect()
14}
15
16#[derive(Default)]
17pub struct ListHandler;
18
19impl TagHandler for ListHandler {
20    /// we're entering "ul" or "ol" tag, no "li" handling here
21    fn handle(&mut self, _tag: &Handle, printer: &mut StructuredPrinter) {
22        printer.insert_newline();
23
24        // insert an extra newline for non-nested lists
25        if list_hierarchy(printer).is_empty() {
26            printer.insert_newline();
27        }
28    }
29
30    /// indent now-ready list
31    fn after_handle(&mut self, printer: &mut StructuredPrinter) {
32        printer.insert_newline();
33        printer.insert_newline();
34    }
35}
36
37#[derive(Default)]
38pub struct ListItemHandler {
39    start_pos: usize,
40    list_type: String,
41}
42
43impl TagHandler for ListItemHandler {
44    fn handle(&mut self, _tag: &Handle, printer: &mut StructuredPrinter) {
45        {
46            let parent_lists = list_hierarchy(printer);
47            let nearest_parent_list = parent_lists.first();
48            if nearest_parent_list.is_none() {
49                // no parent list
50                // should not happen - html5ever cleans html input when parsing
51                return;
52            }
53
54            self.list_type = nearest_parent_list.unwrap().to_string();
55        }
56
57        if !printer.data.ends_with('\n') {
58            // insert newline when declaring a list item only in case there isn't any newline at the end of text
59            printer.insert_newline();
60        }
61
62        let current_depth = printer.parent_chain.len();
63        let order = printer.siblings[&current_depth].len() + 1;
64        match self.list_type.as_ref() {
65            "ul" | "menu" => printer.append_str("* "), // unordered list: *, *, *
66            "ol" => printer.append_str(&(order.to_string() + ". ")), // ordered list: 1, 2, 3
67            _ => {}                                    // never happens
68        }
69
70        self.start_pos = printer.data.len();
71    }
72
73    fn after_handle(&mut self, printer: &mut StructuredPrinter) {
74        let padding = match self.list_type.as_ref() {
75            "ul" => 2,
76            "ol" => 3,
77            _ => 4,
78        };
79
80        // need to cleanup leading newlines, <p> inside <li> should produce valid
81        // list element, not an empty line
82        let index = self.start_pos;
83        while index < printer.data.len() {
84            if printer.data.as_bytes().get(index) == Some(&b'\n')
85                || printer.data.as_bytes().get(index) == Some(&b' ')
86            {
87                printer.data.remove(index);
88            } else {
89                break;
90            }
91        }
92
93        // non-nested indentation (padding). Markdown requires that all paragraphs in the
94        // list item except first should be indented with at least 1 space
95        let mut index = printer.data.len();
96        while index > self.start_pos {
97            if printer.data.as_bytes().get(index) == Some(&b'\n') {
98                printer.insert_str(index + 1, &" ".repeat(padding));
99            }
100            index -= 1;
101        }
102    }
103}