Skip to main content

marco_core/parser/blocks/
gfm_table_parser.rs

1//! GFM table parser - converts grammar output to AST nodes
2//!
3//! Converts `grammar::blocks::gfm_table::GfmTableBlock` into the structured table
4//! AST representation:
5//! - `NodeKind::Table { alignments }`
6//! - `NodeKind::TableRow { header }`
7//! - `NodeKind::TableCell { header, alignment }`
8//!
9//! Cell contents are parsed with the inline parser so emphasis/links/etc work
10//! inside table cells.
11
12use super::shared::{opt_span, GrammarSpan};
13use crate::grammar::blocks::gfm_table::{split_pipe_row_cells, GfmTableBlock};
14use crate::parser::ast::{Node, NodeKind, TableAlignment};
15use nom::Input;
16
17/// Parse a GFM table block into an AST node.
18///
19/// `full_start..full_end` should cover the entire matched table construct (as
20/// returned by the block-level grammar function) so spans/highlighting can
21/// reference the full table region.
22pub fn parse_gfm_table<'a>(
23    table: GfmTableBlock<'a>,
24    full_start: GrammarSpan<'a>,
25    full_end: GrammarSpan<'a>,
26) -> Node {
27    // `full_end` is the remainder span returned by the grammar parser, so we
28    // must use exclusive range semantics here.
29    let span = crate::parser::shared::opt_span_range(full_start, full_end);
30
31    let header_cells = split_pipe_row_cells(table.header_line);
32    let delimiter_cells = split_pipe_row_cells(table.delimiter_line);
33
34    // Grammar guarantees: non-empty and same length.
35    let alignments: Vec<TableAlignment> = delimiter_cells
36        .iter()
37        .map(|cell| parse_alignment(cell.fragment()))
38        .collect();
39
40    let column_count = alignments.len();
41
42    let mut rows: Vec<Node> = Vec::new();
43
44    // Header row
45    rows.push(parse_table_row(
46        true,
47        table.header_line,
48        header_cells,
49        &alignments,
50        column_count,
51    ));
52
53    // Body rows
54    for body_line in table.body_lines {
55        let body_cells = split_pipe_row_cells(body_line);
56        rows.push(parse_table_row(
57            false,
58            body_line,
59            body_cells,
60            &alignments,
61            column_count,
62        ));
63    }
64
65    Node {
66        kind: NodeKind::Table { alignments },
67        span,
68        children: rows,
69    }
70}
71
72pub(crate) fn parse_table_row<'a>(
73    header: bool,
74    row_line: GrammarSpan<'a>,
75    mut cells: Vec<GrammarSpan<'a>>,
76    alignments: &[TableAlignment],
77    column_count: usize,
78) -> Node {
79    let row_span = opt_span(row_line);
80
81    normalize_cells_to_column_count(&mut cells, row_line, column_count);
82
83    let mut children: Vec<Node> = Vec::with_capacity(column_count);
84    for (col_idx, cell_span) in cells.into_iter().enumerate().take(column_count) {
85        let alignment = alignments
86            .get(col_idx)
87            .copied()
88            .unwrap_or(TableAlignment::None);
89        children.push(parse_table_cell(header, alignment, cell_span));
90    }
91
92    Node {
93        kind: NodeKind::TableRow { header },
94        span: row_span,
95        children,
96    }
97}
98
99fn parse_table_cell<'a>(
100    header: bool,
101    alignment: TableAlignment,
102    cell_span: GrammarSpan<'a>,
103) -> Node {
104    let span = opt_span(cell_span);
105
106    let inline_children = match crate::parser::inlines::parse_inlines_from_span(cell_span) {
107        Ok(children) => children,
108        Err(e) => {
109            log::warn!("Failed to parse inline elements in table cell: {}", e);
110            vec![Node {
111                kind: NodeKind::Text(cell_span.fragment().to_string()),
112                span,
113                children: Vec::new(),
114            }]
115        }
116    };
117
118    Node {
119        kind: NodeKind::TableCell { header, alignment },
120        span,
121        children: inline_children,
122    }
123}
124
125fn normalize_cells_to_column_count<'a>(
126    cells: &mut Vec<GrammarSpan<'a>>,
127    row_line: GrammarSpan<'a>,
128    column_count: usize,
129) {
130    if cells.len() > column_count {
131        cells.truncate(column_count);
132    }
133
134    while cells.len() < column_count {
135        cells.push(empty_span_at_end_of_line(row_line));
136    }
137}
138
139fn empty_span_at_end_of_line<'a>(line: GrammarSpan<'a>) -> GrammarSpan<'a> {
140    let len = line.fragment().len();
141    line.take_from(len).take(0)
142}
143
144pub(crate) fn parse_alignment(cell: &str) -> TableAlignment {
145    let cell = cell.trim_matches([' ', '\t']);
146    let left = cell.starts_with(':');
147    let right = cell.ends_with(':');
148
149    match (left, right) {
150        (true, true) => TableAlignment::Center,
151        (true, false) => TableAlignment::Left,
152        (false, true) => TableAlignment::Right,
153        (false, false) => TableAlignment::None,
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160    use crate::grammar::blocks as grammar;
161
162    #[test]
163    fn smoke_test_parse_gfm_table_builds_ast_structure() {
164        let input = GrammarSpan::new("| a | b |\n|---|:--:|\n| 1 | 2 |\n");
165        let start = input;
166        let (rest, table) = grammar::gfm_table(input).expect("should parse table");
167
168        let node = parse_gfm_table(table, start, rest);
169
170        assert!(matches!(node.kind, NodeKind::Table { .. }));
171        assert_eq!(node.children.len(), 2); // header + 1 body row
172
173        assert!(matches!(
174            node.children[0].kind,
175            NodeKind::TableRow { header: true }
176        ));
177        assert!(matches!(
178            node.children[1].kind,
179            NodeKind::TableRow { header: false }
180        ));
181
182        assert_eq!(node.children[0].children.len(), 2);
183        assert_eq!(node.children[1].children.len(), 2);
184
185        // Alignment is propagated into cells.
186        let cell0 = &node.children[0].children[0];
187        let cell1 = &node.children[0].children[1];
188        assert!(matches!(
189            cell0.kind,
190            NodeKind::TableCell {
191                alignment: TableAlignment::None,
192                header: true
193            }
194        ));
195        assert!(matches!(
196            cell1.kind,
197            NodeKind::TableCell {
198                alignment: TableAlignment::Center,
199                header: true
200            }
201        ));
202    }
203
204    #[test]
205    fn smoke_test_row_padding_and_truncation() {
206        let input = GrammarSpan::new("| a | b |\n|---|---|\n| 1 |\n| 2 | 3 | 4 |\n");
207        let start = input;
208        let (rest, table) = grammar::gfm_table(input).expect("should parse table");
209
210        let node = parse_gfm_table(table, start, rest);
211
212        // header + 2 body rows
213        assert_eq!(node.children.len(), 3);
214
215        // Each row should have exactly 2 cells.
216        for row in &node.children {
217            assert_eq!(row.children.len(), 2);
218        }
219    }
220}