html2md_bulletty/
lists.rs

1use super::TagHandler;
2use super::StructuredPrinter;
3
4use markup5ever_rcdom::Handle;
5
6/// gets all list elements registered by a `StructuredPrinter` in reverse order
7fn list_hierarchy(printer: &mut StructuredPrinter) -> Vec<&String> {
8    printer.parent_chain.iter().rev().filter(|&tag| tag == "ul" || tag == "ol" || tag == "menu").collect()
9}
10
11#[derive(Default)]
12pub struct ListHandler;
13
14impl TagHandler for ListHandler {
15
16    /// we're entering "ul" or "ol" tag, no "li" handling here
17    fn handle(&mut self, _tag: &Handle, printer: &mut StructuredPrinter) {
18        printer.insert_newline();
19
20        // insert an extra newline for non-nested lists
21        if list_hierarchy(printer).is_empty() {
22            printer.insert_newline();
23        }
24    }
25
26    /// indent now-ready list
27    fn after_handle(&mut self, printer: &mut StructuredPrinter) {
28        printer.insert_newline();
29        printer.insert_newline();
30    }
31}
32
33#[derive(Default)]
34pub struct ListItemHandler {
35    start_pos: usize,
36    list_type: String
37}
38
39impl TagHandler for ListItemHandler {
40
41    fn handle(&mut self, _tag: &Handle, printer: &mut StructuredPrinter) {
42        {
43            let parent_lists = list_hierarchy(printer);
44            let nearest_parent_list = parent_lists.first();
45            if nearest_parent_list.is_none() {
46                // no parent list
47                // should not happen - html5ever cleans html input when parsing
48                return;
49            }
50
51            self.list_type = nearest_parent_list.unwrap().to_string();
52        }
53
54        if printer.data.chars().last() != Some('\n') {
55            // insert newline when declaring a list item only in case there isn't any newline at the end of text
56            printer.insert_newline();
57        }
58
59        let current_depth = printer.parent_chain.len();
60        let order = printer.siblings[&current_depth].len() + 1;
61        match self.list_type.as_ref() {
62            "ul" | "menu" => printer.append_str("* "), // unordered list: *, *, *
63            "ol" => printer.append_str(&(order.to_string() + ". ")), // ordered list: 1, 2, 3
64            _ => {} // never happens
65        }
66
67        self.start_pos = printer.data.len();
68    }
69
70    fn after_handle(&mut self, printer: &mut StructuredPrinter) {
71        let padding = match self.list_type.as_ref() {
72            "ul" => 2,
73            "ol" => 3,
74            _ => 4
75        };
76
77        // need to cleanup leading newlines, <p> inside <li> should produce valid 
78        // list element, not an empty line
79        let index = self.start_pos;
80        while index < printer.data.len() {
81            if printer.data.bytes().nth(index) == Some(b'\n') || printer.data.bytes().nth(index) == Some(b' ') {
82                printer.data.remove(index);
83            } else {
84                break;
85            }
86        }
87
88        // non-nested indentation (padding). Markdown requires that all paragraphs in the
89        // list item except first should be indented with at least 1 space
90        let mut index = printer.data.len();
91        while index > self.start_pos {
92            if printer.data.bytes().nth(index) == Some(b'\n') {
93                printer.insert_str(index + 1, &" ".repeat(padding));
94            }
95            index -= 1;
96        }
97    }
98}