markdown_it/plugins/extra/
tables.rs

1//! GFM tables
2//!
3//! <https://github.github.com/gfm/#tables-extension->
4use crate::common::sourcemap::SourcePos;
5use crate::parser::block::{BlockRule, BlockState};
6use crate::parser::extset::RenderExt;
7use crate::parser::inline::InlineRoot;
8use crate::plugins::cmark::block::heading::HeadingScanner;
9use crate::plugins::cmark::block::list::ListScanner;
10use crate::{MarkdownIt, Node, NodeValue, Renderer};
11
12#[derive(Debug)]
13pub struct Table {
14    pub alignments: Vec<ColumnAlignment>,
15}
16
17impl NodeValue for Table {
18    fn render(&self, node: &Node, fmt: &mut dyn Renderer) {
19        let old_context = fmt.ext().remove::<TableRenderContext>();
20        fmt.ext().insert(TableRenderContext { head: false, alignments: self.alignments.clone(), index: 0 });
21
22        fmt.cr();
23        fmt.open("table", &node.attrs);
24        fmt.cr();
25        fmt.contents(&node.children);
26        fmt.cr();
27        fmt.close("table");
28        fmt.cr();
29
30        old_context.map(|ctx| fmt.ext().insert(ctx));
31    }
32}
33
34#[derive(Debug, Default)]
35pub struct TableRenderContext {
36    pub head: bool,
37    pub index: usize,
38    pub alignments: Vec<ColumnAlignment>,
39}
40
41impl RenderExt for TableRenderContext {}
42
43#[derive(Debug)]
44pub struct TableHead;
45
46impl NodeValue for TableHead {
47    fn render(&self, node: &Node, fmt: &mut dyn Renderer) {
48        let ctx = fmt.ext().get_or_insert_default::<TableRenderContext>();
49        ctx.head = true;
50
51        fmt.cr();
52        fmt.open("thead", &node.attrs);
53        fmt.cr();
54        fmt.contents(&node.children);
55        fmt.cr();
56        fmt.close("thead");
57        fmt.cr();
58
59        let ctx = fmt.ext().get_or_insert_default::<TableRenderContext>();
60        ctx.head = false;
61    }
62}
63
64#[derive(Debug)]
65pub struct TableBody;
66
67impl NodeValue for TableBody {
68    fn render(&self, node: &Node, fmt: &mut dyn Renderer) {
69        fmt.cr();
70        fmt.open("tbody", &node.attrs);
71        fmt.cr();
72        fmt.contents(&node.children);
73        fmt.cr();
74        fmt.close("tbody");
75        fmt.cr();
76    }
77}
78
79#[derive(Debug)]
80pub struct TableRow;
81
82impl NodeValue for TableRow {
83    fn render(&self, node: &Node, fmt: &mut dyn Renderer) {
84        let ctx = fmt.ext().get_or_insert_default::<TableRenderContext>();
85        ctx.index = 0;
86
87        fmt.cr();
88        fmt.open("tr", &node.attrs);
89        fmt.cr();
90        fmt.contents(&node.children);
91        fmt.cr();
92        fmt.close("tr");
93        fmt.cr();
94    }
95}
96
97#[derive(Debug)]
98pub struct TableCell;
99
100impl NodeValue for TableCell {
101    fn render(&self, node: &Node, fmt: &mut dyn Renderer) {
102        let ctx = fmt.ext().get_or_insert_default::<TableRenderContext>();
103        let tag = if ctx.head { "th" } else { "td" };
104
105        let mut attrs = node.attrs.clone();
106
107        match ctx.alignments.get(ctx.index).copied().unwrap_or_default() {
108            ColumnAlignment::None => (),
109            ColumnAlignment::Left => attrs.push(("style", "text-align:left".to_owned())),
110            ColumnAlignment::Right => attrs.push(("style", "text-align:right".to_owned())),
111            ColumnAlignment::Center => attrs.push(("style", "text-align:center".to_owned())),
112        }
113
114        ctx.index += 1;
115
116        fmt.open(tag, &attrs);
117        fmt.contents(&node.children);
118        fmt.close(tag);
119        fmt.cr();
120    }
121}
122
123pub fn add(md: &mut MarkdownIt) {
124    md.block.add_rule::<TableScanner>()
125        .before::<ListScanner>()
126        .before::<HeadingScanner>();
127}
128
129#[doc(hidden)]
130pub struct TableScanner;
131
132#[derive(Debug)]
133struct RowContent {
134    str: String,
135    srcmap: Vec<(usize, usize)>,
136}
137
138#[derive(Debug, Clone, Copy)]
139pub enum ColumnAlignment {
140    None,
141    Left,
142    Right,
143    Center,
144}
145
146impl Default for ColumnAlignment {
147    fn default() -> Self { Self::None }
148}
149
150impl TableScanner {
151    fn scan_row(line: &str) -> Vec<RowContent> {
152        let mut result = Vec::new();
153        let mut str = String::new();
154        let mut srcmap = vec![(0, 0)];
155        let mut is_escaped = false;
156        let mut is_leading = true;
157
158        for (pos, ch) in line.char_indices() {
159            match ch {
160                ' ' | '\t' if is_leading => {
161                    srcmap[0].1 += 1;
162                }
163                '|' => {
164                    is_leading = false;
165                    if is_escaped {
166                        str.push_str(&line[srcmap.last().unwrap().1..pos-1]);
167                        srcmap.push((str.len(), pos));
168                    } else {
169                        str.push_str(&line[srcmap.last().unwrap().1..pos]);
170                        result.push(RowContent {
171                            str: std::mem::take(&mut str),
172                            srcmap: std::mem::take(&mut srcmap),
173                        });
174                        srcmap = vec![(0, pos + 1)];
175                        is_escaped = false;
176                        is_leading = true;
177                    }
178                }
179                '\\' => {
180                    is_leading = false;
181                    is_escaped = true;
182                }
183                _ => {
184                    is_leading = false;
185                    is_escaped = false;
186                }
187            }
188        }
189
190        str.push_str(&line[srcmap.last().unwrap().1..]);
191        result.push(RowContent {
192            str,
193            srcmap,
194        });
195
196        // trim trailing spaces
197        for content in result.iter_mut() {
198            while content.str.ends_with([ ' ', '\t' ]) {
199                content.str.pop();
200            }
201        }
202
203        // remove last cell if empty
204        if let Some(RowContent { str, srcmap: _ }) = result.last() {
205            if str.is_empty() { result.pop(); }
206        }
207
208        // remove first cell if empty
209        if let Some(RowContent { str, srcmap: _ }) = result.first() {
210            if str.is_empty() { result.remove(0); }
211        }
212
213        result
214    }
215
216    fn scan_alignment_row(line: &str) -> Option<Vec<ColumnAlignment>> {
217        // quick check second line, only allow :-| and spaces
218        // (this is for performance only)
219        let mut has_delimiter = false;
220        for ch in line.chars() {
221            match ch {
222                '|'| ':' => { has_delimiter = true },      
223                '-' | ' ' | '\t' => (),
224                _ => return None,
225            }
226        }
227        if !has_delimiter { return None; }
228
229        // if first character is '-', then second character must not be a space
230        // (due to parsing ambiguity with list)
231        if line.starts_with("- ") { return None; }
232
233        let mut result = Vec::new();
234
235        for RowContent { str, srcmap: _ } in Self::scan_row(line) {
236            let mut alignment : u8 = 0;
237            let mut cell = str.as_str();
238
239            if cell.starts_with(':') {
240                alignment |= 1;
241                cell = &cell[1..];
242            }
243
244            if cell.ends_with(':') {
245                alignment |= 2;
246                cell = &cell[..cell.len()-1];
247            }
248
249            // only allow '-----' in the remainder
250            if cell.is_empty() || cell.contains(|c| c != '-') {
251                return None;
252            }
253
254            result.push(match alignment {
255                0 => ColumnAlignment::None,
256                1 => ColumnAlignment::Left,
257                2 => ColumnAlignment::Right,
258                3 => ColumnAlignment::Center,
259                _ => unreachable!(),
260            });
261        }
262
263        Some(result)
264    }
265
266    fn scan_header(state: &BlockState) -> Option<(Vec<RowContent>, Vec<ColumnAlignment>)> {
267        // should have at least two lines
268        if state.line + 2 > state.line_max { return None; }
269
270        if state.line_indent(state.line) >= state.md.max_indent { return None; }
271
272        let next_line = state.line + 1;
273        if state.line_indent(next_line) < 0 { return None; }
274
275        if state.line_indent(next_line) >= state.md.max_indent { return None; }
276
277        let alignments = Self::scan_alignment_row(state.get_line(next_line))?;
278        let header_row = Self::scan_row(state.get_line(state.line));
279
280        // header row must match the delimiter row in the number of cells
281        if header_row.len() != alignments.len() {
282            return None;
283        }
284
285        // table without any columns is not a table, see markdown-it#724
286        if header_row.is_empty() {
287            return None;
288        }
289
290        Some(( header_row, alignments ))
291    }
292}
293
294impl BlockRule for TableScanner {
295    fn check(state: &mut BlockState) -> Option<()> {
296        if state.node.is::<TableBody>() { return None; }
297
298        Self::scan_header(state).map(|_| ())
299    }
300
301    fn run(state: &mut BlockState) -> Option<(Node, usize)> {
302        let ( header_row, alignments ) = Self::scan_header(state)?;
303        let table_cell_count = header_row.len();
304        let mut table_node = Node::new(Table { alignments });
305
306        let mut thead_node = Node::new(TableHead);
307        thead_node.srcmap = state.get_map(state.line, state.line + 1);
308
309        let mut row_node = Node::new(TableRow);
310        row_node.srcmap = state.get_map(state.line, state.line);
311
312        fn add_cell(row_node: &mut Node, cell: String, srcmap: Vec<(usize, usize)>) {
313            let mut cell_node = Node::new(TableCell);
314            let (start, _) = row_node.srcmap.unwrap().get_byte_offsets();
315            cell_node.srcmap = Some(SourcePos::new(
316                start + srcmap.first().unwrap().1,
317                start + srcmap.last().unwrap().1 + cell.len() - srcmap.last().unwrap().0,
318            ));
319            if !cell.is_empty() {
320                let mapping = srcmap.into_iter().map(|(dstpos, srcpos)| (dstpos, srcpos + start)).collect();
321                cell_node.children.push(Node::new(InlineRoot::new(cell, mapping)));
322            }
323            row_node.children.push(cell_node);
324        }
325
326        for RowContent { str: cell, srcmap } in header_row {
327            add_cell(&mut row_node, cell, srcmap);
328        }
329
330        thead_node.children.push(row_node);
331        table_node.children.push(thead_node);
332
333        let tbody_node = Node::new(TableBody);
334        let old_node = std::mem::replace(&mut state.node, tbody_node);
335
336        //
337        // Iterate table rows
338        //
339
340        let start_line = state.line;
341        state.line += 2;
342
343        while state.line < state.line_max {
344            //
345            // Try to check if table is terminated or continued.
346            //
347            if state.line_indent(state.line) < 0 { break; }
348
349            if state.line_indent(state.line) >= state.md.max_indent { break; }
350
351            // stop if the line is empty
352            if state.is_empty(state.line) { break; }
353
354            // fail if terminating block found
355            if state.test_rules_at_line() { break; }
356
357            let mut row_node = Node::new(TableRow);
358            row_node.srcmap = state.get_map(state.line, state.line);
359            let line = state.get_line(state.line);
360
361            let mut body_row = Self::scan_row(line);
362            let mut end_of_line = RowContent { str: String::new(), srcmap: vec![(0, line.len())] };
363
364            for index in 0..table_cell_count {
365                let RowContent { str: cell, srcmap } = body_row.get_mut(index).unwrap_or(&mut end_of_line);
366                add_cell(&mut row_node, cell.clone(), srcmap.clone());
367            }
368
369            state.node.children.push(row_node);
370            state.line += 1;
371        }
372
373        let mut tbody_node = std::mem::replace(&mut state.node, old_node);
374
375        if !tbody_node.children.is_empty() {
376            tbody_node.srcmap = state.get_map(start_line + 2, state.line - 1);
377            table_node.children.push(tbody_node);
378        }
379
380        let line_count = state.line - start_line;
381        state.line = start_line;
382        Some((table_node, line_count))
383    }
384}
385
386
387#[cfg(test)]
388mod tests {
389    use super::TableScanner;
390
391    #[test]
392    fn should_split_cells() {
393        assert_eq!(TableScanner::scan_row("").len(), 0);
394        assert_eq!(TableScanner::scan_row("a").len(), 1);
395        assert_eq!(TableScanner::scan_row("a | b").len(), 2);
396        assert_eq!(TableScanner::scan_row("a | b | c").len(), 3);
397    }
398
399    #[test]
400    fn should_ignore_leading_trailing_empty_cells() {
401        assert_eq!(TableScanner::scan_row("foo | bar").len(), 2);
402        assert_eq!(TableScanner::scan_row("foo | bar |").len(), 2);
403        assert_eq!(TableScanner::scan_row("| foo | bar").len(), 2);
404        assert_eq!(TableScanner::scan_row("| foo | bar |").len(), 2);
405        assert_eq!(TableScanner::scan_row("| | foo | bar | |").len(), 4);
406        assert_eq!(TableScanner::scan_row("|").len(), 0);
407        assert_eq!(TableScanner::scan_row("||").len(), 1);
408    }
409
410    #[test]
411    fn should_trim_cell_content() {
412        assert_eq!(TableScanner::scan_row("|foo|")[0].str, "foo");
413        assert_eq!(TableScanner::scan_row("| foo |")[0].str, "foo");
414        assert_eq!(TableScanner::scan_row("|\tfoo\t|")[0].str, "foo");
415        assert_eq!(TableScanner::scan_row("| \t foo \t |")[0].str, "foo");
416    }
417
418    #[test]
419    fn should_process_backslash_escapes() {
420        assert_eq!(TableScanner::scan_row(r#"| foo\bar |"#)[0].str, r#"foo\bar"#);
421        assert_eq!(TableScanner::scan_row(r#"| foo\|bar |"#)[0].str, r#"foo|bar"#);
422        assert_eq!(TableScanner::scan_row(r#"| foo\\|bar |"#)[0].str, r#"foo\|bar"#);
423        assert_eq!(TableScanner::scan_row(r#"| foo\\\|bar |"#)[0].str, r#"foo\\|bar"#);
424        assert_eq!(TableScanner::scan_row(r#"| foo\\\\|bar |"#)[0].str, r#"foo\\\|bar"#);
425    }
426
427    #[test]
428    fn should_trim_cell_content_srcmaps() {
429        let row = TableScanner::scan_row("| foo | \tbar\t |");
430        assert_eq!(row[0].str, "foo");
431        assert_eq!(row[0].srcmap, vec![(0, 2)]);
432        assert_eq!(row[1].str, "bar");
433        assert_eq!(row[1].srcmap, vec![(0, 9)]);
434    }
435
436    #[test]
437    fn should_process_backslash_escapes_srcmaps() {
438        let row = TableScanner::scan_row(r#"|  foo\\|bar\\\|baz\  |"#);
439        assert_eq!(row[0].str, r#"foo\|bar\\|baz\"#);
440        assert_eq!(row[0].srcmap, vec![(0, 3), (4, 8), (10, 15)]);
441    }
442
443    #[test]
444    fn require_pipe_or_colon_in_align_row() {
445        let md = &mut crate::MarkdownIt::new();
446        crate::plugins::extra::tables::add(md);
447        let html = md.parse("foo\n---\nbar").render();
448        assert_eq!(html.trim(), "foo\n---\nbar");
449        let html = md.parse("|foo\n---\nbar").render();
450        assert_eq!(html.trim(), "|foo\n---\nbar");
451        let html = md.parse("foo\n|---\nbar").render();
452        assert!(html.trim().starts_with("<table"));
453        let html = md.parse("foo\n:---\nbar").render();
454        assert!(html.trim().starts_with("<table"));
455    }
456}