Skip to main content

marco_core/grammar/blocks/
marco_headerless_table.rs

1// Extended syntax: "headerless" pipe tables
2//
3// Syntax (extension):
4// - First line is a valid GFM delimiter row (pipes + '-' + optional ':' for alignment)
5// - Followed by 1+ body rows (pipe rows)
6// - No header row is produced; all rows are treated as body rows.
7//
8// This enables tables like:
9// |--------|--------|
10// | Data 1 | Data 2 |
11// | Data 3 | Data 4 |
12//
13// Parsing is intentionally conservative:
14// - Requires `|` on delimiter row so it cannot be confused with thematic breaks.
15// - Requires indentation < 4 (avoid indented code blocks).
16// - Requires at least one body row.
17
18use crate::grammar::blocks::gfm_table::{
19    count_unescaped_pipes, is_valid_delimiter_cell, split_pipe_row_cells,
20};
21use crate::grammar::shared::{count_indentation, Span};
22use nom::character::complete::{line_ending, not_line_ending};
23use nom::IResult;
24
25#[derive(Debug, Clone, PartialEq)]
26/// Parsed extended headerless table spans (delimiter plus body rows).
27pub struct MarcoHeaderlessTableBlock<'a> {
28    /// Delimiter row span.
29    pub delimiter_line: Span<'a>,
30    /// Body row spans.
31    pub body_lines: Vec<Span<'a>>,
32}
33
34/// Parse an extended "headerless" pipe table starting at the current position.
35///
36/// Returns the consumed table (delimiter+rows) as spans that reference the
37/// original input.
38pub fn headerless_table(input: Span<'_>) -> IResult<Span<'_>, MarcoHeaderlessTableBlock<'_>> {
39    // Table blocks can't start with 4+ spaces (would be indented code).
40    if count_indentation(input.fragment()) >= 4 {
41        return Err(nom::Err::Error(nom::error::Error::new(
42            input,
43            nom::error::ErrorKind::Tag,
44        )));
45    }
46
47    // Delimiter line (must be the first line)
48    let (after_delimiter_line, delimiter_line) = not_line_ending(input)?;
49    if delimiter_line.fragment().trim().is_empty() {
50        return Err(nom::Err::Error(nom::error::Error::new(
51            input,
52            nom::error::ErrorKind::Tag,
53        )));
54    }
55
56    // Require at least one unescaped '|' in the delimiter line.
57    if count_unescaped_pipes(delimiter_line.fragment()) == 0 {
58        return Err(nom::Err::Error(nom::error::Error::new(
59            input,
60            nom::error::ErrorKind::Tag,
61        )));
62    }
63
64    // Require at least one '-' somewhere on delimiter line.
65    if !delimiter_line.fragment().contains('-') {
66        return Err(nom::Err::Error(nom::error::Error::new(
67            input,
68            nom::error::ErrorKind::Tag,
69        )));
70    }
71
72    // Validate delimiter row cell syntax.
73    let delimiter_cells = split_pipe_row_cells(delimiter_line);
74    if delimiter_cells.is_empty()
75        || !delimiter_cells
76            .iter()
77            .all(|cell| is_valid_delimiter_cell(cell.fragment()))
78    {
79        return Err(nom::Err::Error(nom::error::Error::new(
80            input,
81            nom::error::ErrorKind::Tag,
82        )));
83    }
84
85    // Require a newline after the delimiter line (need at least one body row).
86    let (mut remaining, _) = line_ending(after_delimiter_line)?;
87
88    // First body line: must exist and must look like a pipe row (but not a delimiter row).
89    let (after_first_body, first_body_line) = not_line_ending(remaining)?;
90
91    if first_body_line.fragment().trim().is_empty() {
92        return Err(nom::Err::Error(nom::error::Error::new(
93            input,
94            nom::error::ErrorKind::Tag,
95        )));
96    }
97
98    if count_indentation(first_body_line.fragment()) >= 4 {
99        return Err(nom::Err::Error(nom::error::Error::new(
100            input,
101            nom::error::ErrorKind::Tag,
102        )));
103    }
104
105    if count_unescaped_pipes(first_body_line.fragment()) == 0 {
106        return Err(nom::Err::Error(nom::error::Error::new(
107            input,
108            nom::error::ErrorKind::Tag,
109        )));
110    }
111
112    // Disambiguation: avoid treating two consecutive delimiter rows as a headerless table.
113    // (This also avoids mis-parsing a hyphen-only header row).
114    let first_body_cells = split_pipe_row_cells(first_body_line);
115    let first_body_is_delimiter_row = !first_body_cells.is_empty()
116        && first_body_cells
117            .iter()
118            .all(|cell| is_valid_delimiter_cell(cell.fragment()))
119        && first_body_line.fragment().contains('-');
120
121    if first_body_is_delimiter_row {
122        return Err(nom::Err::Error(nom::error::Error::new(
123            input,
124            nom::error::ErrorKind::Tag,
125        )));
126    }
127
128    let mut body_lines: Vec<Span<'_>> = vec![first_body_line];
129
130    // Consume newline after first body line if present.
131    if let Ok((rest, _)) = line_ending::<Span, nom::error::Error<Span>>(after_first_body) {
132        remaining = rest;
133    } else {
134        remaining = after_first_body;
135        return Ok((
136            remaining,
137            MarcoHeaderlessTableBlock {
138                delimiter_line,
139                body_lines,
140            },
141        ));
142    }
143
144    // Additional body rows: consecutive non-blank lines containing at least one unescaped '|'.
145    while !remaining.fragment().is_empty() {
146        let (after_line, line) = match not_line_ending::<Span, nom::error::Error<Span>>(remaining) {
147            Ok(v) => v,
148            Err(_) => break,
149        };
150
151        if line.fragment().trim().is_empty() {
152            break;
153        }
154
155        // Stop if the line is indented code.
156        if count_indentation(line.fragment()) >= 4 {
157            break;
158        }
159
160        if count_unescaped_pipes(line.fragment()) == 0 {
161            break;
162        }
163
164        body_lines.push(line);
165
166        // Consume the newline if present.
167        match line_ending::<Span, nom::error::Error<Span>>(after_line) {
168            Ok((rest, _)) => remaining = rest,
169            Err(_) => {
170                remaining = after_line;
171                break;
172            }
173        }
174    }
175
176    Ok((
177        remaining,
178        MarcoHeaderlessTableBlock {
179            delimiter_line,
180            body_lines,
181        },
182    ))
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188
189    #[test]
190    fn smoke_test_headerless_table_parses_basic() {
191        let input = Span::new("|--------|--------|--------|\n| Data 1 | Data 2 | Data 3 |\n| Data 4 | Data 5 | Data 6 |\n");
192        let (rest, table) = headerless_table(input).expect("should parse headerless table");
193        assert!(rest.fragment().is_empty());
194        assert_eq!(split_pipe_row_cells(table.delimiter_line).len(), 3);
195        assert_eq!(table.body_lines.len(), 2);
196        assert_eq!(split_pipe_row_cells(table.body_lines[0]).len(), 3);
197    }
198
199    #[test]
200    fn smoke_test_headerless_table_rejects_missing_body_row() {
201        let input = Span::new("|---|---|\n");
202        assert!(headerless_table(input).is_err());
203    }
204
205    #[test]
206    fn smoke_test_headerless_table_rejects_regular_gfm_table() {
207        // Regular table should be handled by the GFM parser, not this extension.
208        let input = Span::new("| a | b |\n|---|---|\n| 1 | 2 |\n");
209        assert!(headerless_table(input).is_err());
210    }
211}