html2md/
lists.rs

1use crate::markup5ever_rcdom;
2
3use super::StructuredPrinter;
4use super::TagHandler;
5
6use markup5ever_rcdom::Handle;
7
8/// gets all list elements registered by a `StructuredPrinter` in reverse order
9fn list_hierarchy(printer: &mut StructuredPrinter) -> Vec<&String> {
10    printer
11        .parent_chain
12        .iter()
13        .rev()
14        .filter(|&tag| tag == "ul" || tag == "ol" || tag == "menu")
15        .collect()
16}
17
18#[derive(Default)]
19pub struct ListHandler;
20
21impl TagHandler for ListHandler {
22    /// we're entering "ul" or "ol" tag, no "li" handling here
23    fn handle(&mut self, _tag: &Handle, printer: &mut StructuredPrinter) {
24        printer.insert_newline();
25
26        // insert an extra newline for non-nested lists
27        if list_hierarchy(printer).is_empty() {
28            printer.insert_newline();
29        }
30    }
31
32    /// indent now-ready list
33    fn after_handle(&mut self, printer: &mut StructuredPrinter) {
34        printer.insert_newline();
35        printer.insert_newline();
36    }
37}
38
39#[derive(Default)]
40pub struct ListItemHandler {
41    start_pos: usize,
42    list_type: String,
43}
44
45impl TagHandler for ListItemHandler {
46    fn handle(&mut self, _tag: &Handle, printer: &mut StructuredPrinter) {
47        {
48            let parent_lists = list_hierarchy(printer);
49            let nearest_parent_list = parent_lists.first();
50            if nearest_parent_list.is_none() {
51                // no parent list
52                // should not happen - html5ever cleans html input when parsing
53                return;
54            }
55
56            self.list_type = nearest_parent_list.unwrap().to_string();
57        }
58
59        if !printer.data.ends_with('\n') {
60            // insert newline when declaring a list item only in case there isn't any newline at the end of text
61            printer.insert_newline();
62        }
63
64        let current_depth = printer.parent_chain.len();
65        let order = printer.siblings[&current_depth].len() + 1;
66        match self.list_type.as_ref() {
67            "ul" | "menu" => printer.append_str("* "), // unordered list: *, *, *
68            "ol" => printer.append_str(&(order.to_string() + ". ")), // ordered list: 1, 2, 3
69            _ => {}                                    // never happens
70        }
71
72        self.start_pos = printer.data.len();
73    }
74
75    fn after_handle(&mut self, printer: &mut StructuredPrinter) {
76        let padding = match self.list_type.as_ref() {
77            "ul" => 2,
78            "ol" => 3,
79            _ => 4,
80        };
81
82        // need to clean up leading newlines, <p> inside <li> should produce
83        // valid list element, not an empty line
84        let index = self.start_pos;
85        while index < printer.data.len() {
86            if printer.data.as_bytes().get(index) == Some(&b'\n')
87                || printer.data.as_bytes().get(index) == Some(&b' ')
88            {
89                printer.data.remove(index);
90            } else {
91                break;
92            }
93        }
94
95        // non-nested indentation (padding). Markdown requires that all
96        // paragraphs in the list item except first should be indented with at
97        // least 1 space
98        let mut index = printer.data.len();
99        while index > self.start_pos {
100            if printer.data.as_bytes().get(index) == Some(&b'\n') {
101                printer.insert_str(index + 1, &" ".repeat(padding));
102            }
103            index -= 1;
104        }
105    }
106}