html2md/
tables.rs

1use super::{walk, clean_markdown};
2use super::TagHandler;
3use super::StructuredPrinter;
4
5use std::{collections::HashMap, cmp};
6
7use markup5ever_rcdom::{Handle,NodeData};
8
9#[derive(Default)]
10pub struct TableHandler;
11
12impl TagHandler for TableHandler {
13
14    fn handle(&mut self, tag: &Handle, printer: &mut StructuredPrinter) {
15        let mut table_markup = String::new();
16        let any_matcher = |cell: &Handle| { let name = tag_name(cell); name == "td" || name == "th" };
17
18        // detect cell width, counts
19        let column_count : usize;
20        let mut column_widths : Vec<usize>;
21        let rows = find_children(tag, "tr");
22        {
23            // detect row count
24            let most_big_row = rows.iter().max_by(|left, right| collect_children(&left, any_matcher).len().cmp(&collect_children(&right, any_matcher).len()));
25            if most_big_row.is_none() {
26                // we don't have rows with content at all
27                return;
28            }
29            // have rows with content, set column count
30            column_count = collect_children(&most_big_row.unwrap(), any_matcher).len();
31            column_widths = vec![3; column_count];
32
33            // detect max column width
34            for row in &rows {
35                let cells = collect_children(row, any_matcher);
36                for index in 0..column_count {
37                    // from regular rows
38                    if let Some(cell) = cells.get(index) {
39                        let text = to_text(cell);
40                        column_widths[index] = cmp::max(column_widths[index], text.chars().count());
41                    }
42                }
43            }
44        }
45
46        // header row must always be present
47        for (idx, row) in rows.iter().enumerate() {
48            table_markup.push('|');
49            let cells = collect_children(row, any_matcher);
50            for index in 0..column_count { // we need to fill all cells in a column, even if some rows don't have enough
51                let padded_cell_text = pad_cell_text(&cells.get(index), column_widths[index]);
52                table_markup.push_str(&padded_cell_text);
53                table_markup.push('|');
54            }
55            table_markup.push('\n');
56
57            if idx == 0 {
58                // first row is a header row
59                // add header-body divider row
60                table_markup.push('|');
61                for index in 0..column_count {
62                    let width = column_widths[index];
63                    if width < 3 {
64                        // no point in aligning, just post as-is
65                        table_markup.push_str(&"-".repeat(width));
66                        table_markup.push('|');
67                        continue;
68                    }
69
70                    // try to detect alignment
71                    let mut alignment = String::new();
72                    if let Some(header_cell) = cells.get(index) {
73                        // we have a header, try to extract alignment from it
74                        alignment = match header_cell.data {
75                            NodeData::Element { ref attrs, .. } => {
76                                let attrs = attrs.borrow();
77                                let align_attr = attrs.iter().find(|attr| attr.name.local.to_string() == "align");
78                                align_attr.map(|attr| attr.value.to_string()).unwrap_or_default()
79                            }
80                            _ => String::new()
81                        };
82                    }
83
84                    // push lines according to alignment, fallback to default behaviour
85                    match alignment.as_ref() {
86                        "left" => { table_markup.push(':'); table_markup.push_str(&"-".repeat(width - 1)); }
87                        "center" => { table_markup.push(':'); table_markup.push_str(&"-".repeat(width - 2)); table_markup.push(':'); }
88                        "right" => { table_markup.push_str(&"-".repeat(width - 1)); table_markup.push(':'); }
89                        _ => table_markup.push_str(&"-".repeat(width))
90                    }
91                    table_markup.push('|');
92                }
93                table_markup.push('\n');
94            }
95        }
96
97        printer.insert_newline();
98        printer.insert_newline();
99        printer.append_str(&table_markup);
100    }
101
102    fn after_handle(&mut self, _printer: &mut StructuredPrinter) {
103
104    }
105
106    fn skip_descendants(&self) -> bool {
107        return true;
108    }
109}
110
111/// Pads cell text from right and left so it looks centered inside the table cell
112/// ### Arguments
113/// `tag` - optional reference to currently processed handle, text is extracted from here
114///
115/// `column_width` - precomputed column width to compute padding length from
116fn pad_cell_text(tag: &Option<&Handle>, column_width: usize) -> String {
117    let mut result = String::new();
118    if let Some(cell) = tag {
119        // have header at specified position
120        let text = to_text(cell);
121        // compute difference between width and text length
122        let len_diff = column_width - text.chars().count();
123        if len_diff > 0 {
124            // should pad
125            if len_diff > 1 {
126                // should pad from both sides
127                let pad_len = len_diff / 2;
128                let remainder = len_diff % 2;
129                result.push_str(&" ".repeat(pad_len));
130                result.push_str(&text);
131                result.push_str(&" ".repeat(pad_len + remainder));
132            } else {
133                // it's just one space, add at the end
134                result.push_str(&text);
135                result.push(' ');
136            }
137        } else {
138            // shouldn't pad, text fills whole cell
139            result.push_str(&text);
140        }
141    } else {
142        // no text in this cell, fill cell with spaces
143        let pad_len = column_width;
144        result.push_str(&" ".repeat(pad_len));
145    }
146
147    return result;
148}
149
150/// Extracts tag name from passed tag
151/// Returns empty string if it's not an html element
152fn tag_name(tag: &Handle) -> String {
153    return match tag.data {
154        NodeData::Element { ref name, .. } => name.local.to_string(),
155        _ => String::new()
156    }
157}
158
159/// Find descendants of this tag with tag name `name`
160/// This includes both direct children and descendants
161fn  find_children(tag: &Handle, name: &str) -> Vec<Handle> {
162    let mut result: Vec<Handle> = vec![];
163    let children = tag.children.borrow();
164    for child in children.iter() {
165        if tag_name(&child) == name {
166            result.push(child.clone());
167        }
168
169        let mut descendants = find_children(&child, name);
170        result.append(&mut descendants);
171    }
172
173    return result;
174}
175
176/// Collect direct children that satisfy the predicate
177/// This doesn't include descendants
178fn collect_children<P>(tag: &Handle, predicate: P) -> Vec<Handle>
179where P: Fn(&Handle) -> bool {
180    let mut result: Vec<Handle> = vec![];
181    let children = tag.children.borrow();
182    for child in children.iter() {
183        let candidate = child.clone();
184        if predicate(&candidate) {
185            result.push(candidate);
186        }
187    }
188
189    return result;
190}
191
192/// Convert html tag to text. This collects all tag children in correct order where they're observed
193/// and concatenates their text, recursively.
194fn  to_text(tag: &Handle) -> String {
195    let mut printer = StructuredPrinter::default();
196    walk(tag, &mut printer, &HashMap::default());
197
198
199    let result = clean_markdown(&printer.data);
200    return result.replace("\n", "<br/>");
201}