markdown_that/plugins/extra/
tables.rs

1//! GFM tables
2//!
3//! <https://github.github.com/gfm/#tables-extension->
4use crate::common::sourcemap::SourcePos;
5use crate::parser::block::{BlockRule, BlockState};
6use crate::parser::extset::RenderExt;
7use crate::parser::inline::InlineRoot;
8use crate::plugins::cmark::block::heading::HeadingScanner;
9use crate::plugins::cmark::block::list::ListScanner;
10use crate::{MarkdownThat, Node, NodeValue, Renderer};
11
12#[derive(Debug)]
13pub struct Table {
14    pub alignments: Vec<ColumnAlignment>,
15}
16
17impl NodeValue for Table {
18    fn render(&self, node: &Node, fmt: &mut dyn Renderer) {
19        let old_context = fmt.ext().remove::<TableRenderContext>();
20        fmt.ext().insert(TableRenderContext {
21            head: false,
22            alignments: self.alignments.clone(),
23            index: 0,
24        });
25
26        fmt.cr();
27        fmt.open("table", &node.attrs);
28        fmt.cr();
29        fmt.contents(&node.children);
30        fmt.cr();
31        fmt.close("table");
32        fmt.cr();
33
34        old_context.map(|ctx| fmt.ext().insert(ctx));
35    }
36}
37
38#[derive(Debug, Default)]
39pub struct TableRenderContext {
40    pub head: bool,
41    pub index: usize,
42    pub alignments: Vec<ColumnAlignment>,
43}
44
45impl RenderExt for TableRenderContext {}
46
47#[derive(Debug)]
48pub struct TableHead;
49
50impl NodeValue for TableHead {
51    fn render(&self, node: &Node, fmt: &mut dyn Renderer) {
52        let ctx = fmt.ext().get_or_insert_default::<TableRenderContext>();
53        ctx.head = true;
54
55        fmt.cr();
56        fmt.open("thead", &node.attrs);
57        fmt.cr();
58        fmt.contents(&node.children);
59        fmt.cr();
60        fmt.close("thead");
61        fmt.cr();
62
63        let ctx = fmt.ext().get_or_insert_default::<TableRenderContext>();
64        ctx.head = false;
65    }
66}
67
68#[derive(Debug)]
69pub struct TableBody;
70
71impl NodeValue for TableBody {
72    fn render(&self, node: &Node, fmt: &mut dyn Renderer) {
73        fmt.cr();
74        fmt.open("tbody", &node.attrs);
75        fmt.cr();
76        fmt.contents(&node.children);
77        fmt.cr();
78        fmt.close("tbody");
79        fmt.cr();
80    }
81}
82
83#[derive(Debug)]
84pub struct TableRow;
85
86impl NodeValue for TableRow {
87    fn render(&self, node: &Node, fmt: &mut dyn Renderer) {
88        let ctx = fmt.ext().get_or_insert_default::<TableRenderContext>();
89        ctx.index = 0;
90
91        fmt.cr();
92        fmt.open("tr", &node.attrs);
93        fmt.cr();
94        fmt.contents(&node.children);
95        fmt.cr();
96        fmt.close("tr");
97        fmt.cr();
98    }
99}
100
101#[derive(Debug)]
102pub struct TableCell;
103
104impl NodeValue for TableCell {
105    fn render(&self, node: &Node, fmt: &mut dyn Renderer) {
106        let ctx = fmt.ext().get_or_insert_default::<TableRenderContext>();
107        let tag = if ctx.head { "th" } else { "td" };
108
109        let mut attrs = node.attrs.clone();
110
111        match ctx.alignments.get(ctx.index).copied().unwrap_or_default() {
112            ColumnAlignment::None => (),
113            ColumnAlignment::Left => attrs.push(("style", "text-align:left".to_owned())),
114            ColumnAlignment::Right => attrs.push(("style", "text-align:right".to_owned())),
115            ColumnAlignment::Center => attrs.push(("style", "text-align:center".to_owned())),
116        }
117
118        ctx.index += 1;
119
120        fmt.open(tag, &attrs);
121        fmt.contents(&node.children);
122        fmt.close(tag);
123        fmt.cr();
124    }
125}
126
127pub fn add(md: &mut MarkdownThat) {
128    md.block
129        .add_rule::<TableScanner>()
130        .before::<ListScanner>()
131        .before::<HeadingScanner>();
132}
133
134#[doc(hidden)]
135pub struct TableScanner;
136
137#[derive(Debug)]
138struct RowContent {
139    str: String,
140    srcmap: Vec<(usize, usize)>,
141}
142
143#[derive(Debug, Clone, Copy)]
144pub enum ColumnAlignment {
145    None,
146    Left,
147    Right,
148    Center,
149}
150
151impl Default for ColumnAlignment {
152    fn default() -> Self {
153        Self::None
154    }
155}
156
157impl TableScanner {
158    fn scan_row(line: &str) -> Vec<RowContent> {
159        let mut result = Vec::new();
160        let mut str = String::new();
161        let mut srcmap = vec![(0, 0)];
162        let mut is_escaped = false;
163        let mut is_leading = true;
164
165        for (pos, ch) in line.char_indices() {
166            match ch {
167                ' ' | '\t' if is_leading => {
168                    srcmap[0].1 += 1;
169                }
170                '|' => {
171                    is_leading = false;
172                    if is_escaped {
173                        str.push_str(&line[srcmap.last().unwrap().1..pos - 1]);
174                        srcmap.push((str.len(), pos));
175                    } else {
176                        str.push_str(&line[srcmap.last().unwrap().1..pos]);
177                        result.push(RowContent {
178                            str: std::mem::take(&mut str),
179                            srcmap: std::mem::take(&mut srcmap),
180                        });
181                        srcmap = vec![(0, pos + 1)];
182                        is_escaped = false;
183                        is_leading = true;
184                    }
185                }
186                '\\' => {
187                    is_leading = false;
188                    is_escaped = true;
189                }
190                _ => {
191                    is_leading = false;
192                    is_escaped = false;
193                }
194            }
195        }
196
197        str.push_str(&line[srcmap.last().unwrap().1..]);
198        result.push(RowContent { str, srcmap });
199
200        // trim trailing spaces
201        for content in result.iter_mut() {
202            while content.str.ends_with([' ', '\t']) {
203                content.str.pop();
204            }
205        }
206
207        // remove last cell if empty
208        if let Some(RowContent { str, srcmap: _ }) = result.last() {
209            if str.is_empty() {
210                result.pop();
211            }
212        }
213
214        // remove first cell if empty
215        if let Some(RowContent { str, srcmap: _ }) = result.first() {
216            if str.is_empty() {
217                result.remove(0);
218            }
219        }
220
221        result
222    }
223
224    fn scan_alignment_row(line: &str) -> Option<Vec<ColumnAlignment>> {
225        // quick check second line, only allow :-| and spaces
226        // (this is for performance only)
227        let mut has_delimiter = false;
228        for ch in line.chars() {
229            match ch {
230                '|' | ':' => has_delimiter = true,
231                '-' | ' ' | '\t' => (),
232                _ => return None,
233            }
234        }
235        if !has_delimiter {
236            return None;
237        }
238
239        // if first character is '-', then second character must not be a space
240        // (due to parsing ambiguity with list)
241        if line.starts_with("- ") {
242            return None;
243        }
244
245        let mut result = Vec::new();
246
247        for RowContent { str, srcmap: _ } in Self::scan_row(line) {
248            let mut alignment: u8 = 0;
249            let mut cell = str.as_str();
250
251            if cell.starts_with(':') {
252                alignment |= 1;
253                cell = &cell[1..];
254            }
255
256            if cell.ends_with(':') {
257                alignment |= 2;
258                cell = &cell[..cell.len() - 1];
259            }
260
261            // only allow '-----' in the remainder
262            if cell.is_empty() || cell.contains(|c| c != '-') {
263                return None;
264            }
265
266            result.push(match alignment {
267                0 => ColumnAlignment::None,
268                1 => ColumnAlignment::Left,
269                2 => ColumnAlignment::Right,
270                3 => ColumnAlignment::Center,
271                _ => unreachable!(),
272            });
273        }
274
275        Some(result)
276    }
277
278    fn scan_header(state: &BlockState) -> Option<(Vec<RowContent>, Vec<ColumnAlignment>)> {
279        // should have at least two lines
280        if state.line + 2 > state.line_max {
281            return None;
282        }
283
284        if state.line_indent(state.line) >= state.md.max_indent {
285            return None;
286        }
287
288        let next_line = state.line + 1;
289        if state.line_indent(next_line) < 0 {
290            return None;
291        }
292
293        if state.line_indent(next_line) >= state.md.max_indent {
294            return None;
295        }
296
297        let alignments = Self::scan_alignment_row(state.get_line(next_line))?;
298        let header_row = Self::scan_row(state.get_line(state.line));
299
300        // header row must match the delimiter row in the number of cells
301        if header_row.len() != alignments.len() {
302            return None;
303        }
304
305        // table without any columns is not a table, see markdown-it#724
306        if header_row.is_empty() {
307            return None;
308        }
309
310        Some((header_row, alignments))
311    }
312}
313
314impl BlockRule for TableScanner {
315    fn check(state: &mut BlockState) -> Option<()> {
316        if state.node.is::<TableBody>() {
317            return None;
318        }
319
320        Self::scan_header(state).map(|_| ())
321    }
322
323    fn run(state: &mut BlockState) -> Option<(Node, usize)> {
324        let (header_row, alignments) = Self::scan_header(state)?;
325        let table_cell_count = header_row.len();
326        let mut table_node = Node::new(Table { alignments });
327
328        let mut thead_node = Node::new(TableHead);
329        thead_node.srcmap = state.get_map(state.line, state.line + 1);
330
331        let mut row_node = Node::new(TableRow);
332        row_node.srcmap = state.get_map(state.line, state.line);
333
334        fn add_cell(row_node: &mut Node, cell: String, srcmap: Vec<(usize, usize)>) {
335            let mut cell_node = Node::new(TableCell);
336            let (start, _) = row_node.srcmap.unwrap().get_byte_offsets();
337            cell_node.srcmap = Some(SourcePos::new(
338                start + srcmap.first().unwrap().1,
339                start + srcmap.last().unwrap().1 + cell.len() - srcmap.last().unwrap().0,
340            ));
341            if !cell.is_empty() {
342                let mapping = srcmap
343                    .into_iter()
344                    .map(|(dstpos, srcpos)| (dstpos, srcpos + start))
345                    .collect();
346                cell_node
347                    .children
348                    .push(Node::new(InlineRoot::new(cell, mapping)));
349            }
350            row_node.children.push(cell_node);
351        }
352
353        for RowContent { str: cell, srcmap } in header_row {
354            add_cell(&mut row_node, cell, srcmap);
355        }
356
357        thead_node.children.push(row_node);
358        table_node.children.push(thead_node);
359
360        let tbody_node = Node::new(TableBody);
361        let old_node = std::mem::replace(&mut state.node, tbody_node);
362
363        //
364        // Iterate table rows
365        //
366
367        let start_line = state.line;
368        state.line += 2;
369
370        while state.line < state.line_max {
371            //
372            // Try to check if table is terminated or continued.
373            //
374            if state.line_indent(state.line) < 0 {
375                break;
376            }
377
378            if state.line_indent(state.line) >= state.md.max_indent {
379                break;
380            }
381
382            // stop if the line is empty
383            if state.is_empty(state.line) {
384                break;
385            }
386
387            // fail if terminating block found
388            if state.test_rules_at_line() {
389                break;
390            }
391
392            let mut row_node = Node::new(TableRow);
393            row_node.srcmap = state.get_map(state.line, state.line);
394            let line = state.get_line(state.line);
395
396            let mut body_row = Self::scan_row(line);
397            let mut end_of_line = RowContent {
398                str: String::new(),
399                srcmap: vec![(0, line.len())],
400            };
401
402            for index in 0..table_cell_count {
403                let RowContent { str: cell, srcmap } =
404                    body_row.get_mut(index).unwrap_or(&mut end_of_line);
405                add_cell(&mut row_node, cell.clone(), srcmap.clone());
406            }
407
408            state.node.children.push(row_node);
409            state.line += 1;
410        }
411
412        let mut tbody_node = std::mem::replace(&mut state.node, old_node);
413
414        if !tbody_node.children.is_empty() {
415            tbody_node.srcmap = state.get_map(start_line + 2, state.line - 1);
416            table_node.children.push(tbody_node);
417        }
418
419        let line_count = state.line - start_line;
420        state.line = start_line;
421        Some((table_node, line_count))
422    }
423}
424
425#[cfg(test)]
426mod tests {
427    use super::TableScanner;
428
429    #[test]
430    fn should_split_cells() {
431        assert_eq!(TableScanner::scan_row("").len(), 0);
432        assert_eq!(TableScanner::scan_row("a").len(), 1);
433        assert_eq!(TableScanner::scan_row("a | b").len(), 2);
434        assert_eq!(TableScanner::scan_row("a | b | c").len(), 3);
435    }
436
437    #[test]
438    fn should_ignore_leading_trailing_empty_cells() {
439        assert_eq!(TableScanner::scan_row("foo | bar").len(), 2);
440        assert_eq!(TableScanner::scan_row("foo | bar |").len(), 2);
441        assert_eq!(TableScanner::scan_row("| foo | bar").len(), 2);
442        assert_eq!(TableScanner::scan_row("| foo | bar |").len(), 2);
443        assert_eq!(TableScanner::scan_row("| | foo | bar | |").len(), 4);
444        assert_eq!(TableScanner::scan_row("|").len(), 0);
445        assert_eq!(TableScanner::scan_row("||").len(), 1);
446    }
447
448    #[test]
449    fn should_trim_cell_content() {
450        assert_eq!(TableScanner::scan_row("|foo|")[0].str, "foo");
451        assert_eq!(TableScanner::scan_row("| foo |")[0].str, "foo");
452        assert_eq!(TableScanner::scan_row("|\tfoo\t|")[0].str, "foo");
453        assert_eq!(TableScanner::scan_row("| \t foo \t |")[0].str, "foo");
454    }
455
456    #[test]
457    fn should_process_backslash_escapes() {
458        assert_eq!(
459            TableScanner::scan_row(r#"| foo\bar |"#)[0].str,
460            r#"foo\bar"#
461        );
462        assert_eq!(
463            TableScanner::scan_row(r#"| foo\|bar |"#)[0].str,
464            r#"foo|bar"#
465        );
466        assert_eq!(
467            TableScanner::scan_row(r#"| foo\\|bar |"#)[0].str,
468            r#"foo\|bar"#
469        );
470        assert_eq!(
471            TableScanner::scan_row(r#"| foo\\\|bar |"#)[0].str,
472            r#"foo\\|bar"#
473        );
474        assert_eq!(
475            TableScanner::scan_row(r#"| foo\\\\|bar |"#)[0].str,
476            r#"foo\\\|bar"#
477        );
478    }
479
480    #[test]
481    fn should_trim_cell_content_srcmaps() {
482        let row = TableScanner::scan_row("| foo | \tbar\t |");
483        assert_eq!(row[0].str, "foo");
484        assert_eq!(row[0].srcmap, vec![(0, 2)]);
485        assert_eq!(row[1].str, "bar");
486        assert_eq!(row[1].srcmap, vec![(0, 9)]);
487    }
488
489    #[test]
490    fn should_process_backslash_escapes_srcmaps() {
491        let row = TableScanner::scan_row(r#"|  foo\\|bar\\\|baz\  |"#);
492        assert_eq!(row[0].str, r#"foo\|bar\\|baz\"#);
493        assert_eq!(row[0].srcmap, vec![(0, 3), (4, 8), (10, 15)]);
494    }
495
496    #[test]
497    fn require_pipe_or_colon_in_align_row() {
498        let md = &mut crate::MarkdownThat::new();
499        crate::plugins::extra::tables::add(md);
500        let html = md.parse("foo\n---\nbar").render();
501        assert_eq!(html.trim(), "foo\n---\nbar");
502        let html = md.parse("|foo\n---\nbar").render();
503        assert_eq!(html.trim(), "|foo\n---\nbar");
504        let html = md.parse("foo\n|---\nbar").render();
505        assert!(html.trim().starts_with("<table"));
506        let html = md.parse("foo\n:---\nbar").render();
507        assert!(html.trim().starts_with("<table"));
508    }
509}