Skip to main content

panache_parser/parser/blocks/
tables.rs

1//! Simple table parsing for Pandoc's simple_tables extension.
2
3use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6use unicode_width::UnicodeWidthChar;
7
8use crate::parser::utils::helpers::{emit_line_tokens, strip_newline};
9use crate::parser::utils::inline_emission;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum Alignment {
13    Left,
14    Right,
15    Center,
16    Default,
17}
18
19/// Column information extracted from the separator line.
20#[derive(Debug, Clone)]
21pub(crate) struct Column {
22    /// Start position (byte index) in the line
23    start: usize,
24    /// End position (byte index) in the line
25    end: usize,
26    /// Column alignment
27    alignment: Alignment,
28}
29
30/// Try to detect if a line is a table separator line.
31/// Returns Some(column positions) if it's a valid separator.
32pub(crate) fn try_parse_table_separator(line: &str) -> Option<Vec<Column>> {
33    let trimmed = line.trim_start();
34    // Strip trailing newline if present (CRLF or LF)
35    let (trimmed, newline_str) = strip_newline(trimmed);
36    let leading_spaces = line.len() - trimmed.len() - newline_str.len();
37
38    // Must have leading spaces <= 3 to not be a code block
39    if leading_spaces > 3 {
40        return None;
41    }
42
43    // Simple tables only use dashed separators.
44    if trimmed.contains('*') || trimmed.contains('_') {
45        return None;
46    }
47
48    // Must contain at least one dash
49    if !trimmed.contains('-') {
50        return None;
51    }
52
53    // A separator line consists of dashes and spaces
54    if !trimmed.chars().all(|c| c == '-' || c == ' ') {
55        return None;
56    }
57
58    // Must not be a horizontal rule.
59    let dash_groups: Vec<_> = trimmed.split(' ').filter(|s| !s.is_empty()).collect();
60    if dash_groups.len() <= 1 {
61        return None;
62    }
63
64    // Extract column positions from dash groups
65    let columns = extract_columns(trimmed, leading_spaces);
66
67    if columns.is_empty() {
68        return None;
69    }
70
71    Some(columns)
72}
73
74/// Extract column positions from a separator line.
75fn extract_columns(separator: &str, offset: usize) -> Vec<Column> {
76    let mut columns = Vec::new();
77    let mut in_dashes = false;
78    let mut col_start = 0;
79
80    for (i, ch) in separator.char_indices() {
81        match ch {
82            '-' if !in_dashes => {
83                col_start = i + offset;
84                in_dashes = true;
85            }
86            ' ' if in_dashes => {
87                columns.push(Column {
88                    start: col_start,
89                    end: i + offset,
90                    alignment: Alignment::Default, // Will be determined later
91                });
92                in_dashes = false;
93            }
94            _ => {}
95        }
96    }
97
98    // Handle last column
99    if in_dashes {
100        columns.push(Column {
101            start: col_start,
102            end: separator.len() + offset,
103            alignment: Alignment::Default,
104        });
105    }
106
107    columns
108}
109
110/// Try to parse a table caption from a line.
111/// Returns Some((prefix_len, caption_text)) if it's a caption.
112fn try_parse_caption_prefix(line: &str) -> Option<(usize, &str)> {
113    let trimmed = line.trim_start();
114    let leading_spaces = line.len() - trimmed.len();
115
116    // Must have leading spaces <= 3 to not be a code block
117    if leading_spaces > 3 {
118        return None;
119    }
120
121    // Check for "Table:" or "table:" or just ":".
122    if let Some(rest) = trimmed.strip_prefix("Table:") {
123        Some((leading_spaces + 6, rest))
124    } else if let Some(rest) = trimmed.strip_prefix("table:") {
125        Some((leading_spaces + 6, rest))
126    } else if let Some(rest) = trimmed.strip_prefix(':') {
127        // Just ":" caption markers must be followed by whitespace (Pandoc-style).
128        // This avoids accidentally treating constructs like fenced div fences ":::" as captions.
129        if rest.starts_with(|c: char| c.is_whitespace()) {
130            Some((leading_spaces + 1, rest))
131        } else {
132            None
133        }
134    } else {
135        None
136    }
137}
138
139/// Check if a line could be the start of a table caption.
140fn is_table_caption_start(line: &str) -> bool {
141    try_parse_caption_prefix(line).is_some()
142}
143
144fn is_bare_colon_caption_start(line: &str) -> bool {
145    let trimmed = line.trim_start();
146    trimmed.starts_with(':') && !trimmed.starts_with("::") && !trimmed.starts_with(":::")
147}
148
149fn bare_colon_caption_looks_like_definition_code_block(line: &str) -> bool {
150    let Some((_, rest)) = try_parse_caption_prefix(line) else {
151        return false;
152    };
153    let trimmed = rest.trim_start();
154    trimmed.starts_with("```") || trimmed.starts_with("~~~")
155}
156
157fn line_is_fenced_div_fence(line: &str) -> bool {
158    let trimmed = line.trim_start();
159    let colon_count = trimmed.chars().take_while(|&c| c == ':').count();
160    if colon_count < 3 {
161        return false;
162    }
163    let rest = &trimmed[colon_count..];
164    rest.is_empty() || rest.starts_with(char::is_whitespace)
165}
166
167fn is_valid_caption_start_before_table(lines: &[&str], pos: usize) -> bool {
168    if !is_table_caption_start(lines[pos]) {
169        return false;
170    }
171
172    if is_bare_colon_caption_start(lines[pos])
173        && bare_colon_caption_looks_like_definition_code_block(lines[pos])
174    {
175        return false;
176    }
177
178    // Avoid stealing definition-list definitions (":   ...") as table captions.
179    if is_bare_colon_caption_start(lines[pos])
180        && pos > 0
181        && !lines[pos - 1].trim().is_empty()
182        && !line_is_fenced_div_fence(lines[pos - 1])
183    {
184        return false;
185    }
186    true
187}
188
189/// Check if a line could be the start of a grid table.
190/// Grid tables start with a separator line like +---+---+ or +===+===+
191fn is_grid_table_start(line: &str) -> bool {
192    try_parse_grid_separator(line).is_some()
193}
194
195/// Check if a line could be the start of a multiline table.
196/// Multiline tables start with either:
197/// - A full-width dash separator (----)
198/// - A column separator with dashes and spaces (---- ---- ----)
199fn is_multiline_table_start(line: &str) -> bool {
200    try_parse_multiline_separator(line).is_some() || is_column_separator(line)
201}
202
203/// Check if there's a table following a potential caption at this position.
204/// This is used to avoid parsing a caption as a paragraph when it belongs to a table.
205pub(crate) fn is_caption_followed_by_table(lines: &[&str], caption_pos: usize) -> bool {
206    if caption_pos >= lines.len() {
207        return false;
208    }
209
210    // Caption must start with a caption prefix
211    if !is_valid_caption_start_before_table(lines, caption_pos) {
212        return false;
213    }
214
215    let mut pos = caption_pos + 1;
216
217    // Skip continuation lines of caption (non-blank lines)
218    while pos < lines.len() && !lines[pos].trim().is_empty() {
219        // If we hit a table separator, we found a table
220        if try_parse_table_separator(lines[pos]).is_some() {
221            return true;
222        }
223        pos += 1;
224    }
225
226    // Skip one blank line
227    if pos < lines.len() && lines[pos].trim().is_empty() {
228        pos += 1;
229    }
230
231    // Check for table at next position
232    if pos < lines.len() {
233        let line = lines[pos];
234
235        // Check for grid table start (+---+---+ or +===+===+)
236        if is_grid_table_start(line) {
237            return true;
238        }
239
240        // Check for multiline table start (---- or ---- ---- ----)
241        if is_multiline_table_start(line) {
242            return true;
243        }
244
245        // Could be a separator line (simple/pipe table, headerless)
246        if try_parse_table_separator(line).is_some() {
247            return true;
248        }
249
250        // Or could be a header line followed by separator (simple/pipe table with header)
251        if pos + 1 < lines.len() && !line.trim().is_empty() {
252            let next_line = lines[pos + 1];
253            if try_parse_table_separator(next_line).is_some()
254                || try_parse_pipe_separator(next_line).is_some()
255            {
256                return true;
257            }
258        }
259    }
260
261    false
262}
263
264fn caption_range_starting_at(lines: &[&str], start: usize) -> Option<(usize, usize)> {
265    if start >= lines.len() || !is_table_caption_start(lines[start]) {
266        return None;
267    }
268    let mut end = start + 1;
269    while end < lines.len() && !lines[end].trim().is_empty() {
270        end += 1;
271    }
272    Some((start, end))
273}
274
275/// Find caption before table (if any).
276/// Returns (caption_start, caption_end) positions, or None.
277fn find_caption_before_table(lines: &[&str], table_start: usize) -> Option<(usize, usize)> {
278    if table_start == 0 {
279        return None;
280    }
281
282    // Look backward for a caption
283    // Caption must be immediately before table (with possible blank line between)
284    let mut pos = table_start - 1;
285
286    // Skip one blank line if present
287    if lines[pos].trim().is_empty() {
288        if pos == 0 {
289            return None;
290        }
291        pos -= 1;
292    }
293
294    // Now pos points to the last non-blank line before the table
295    // This could be the last line of a multiline caption, or a single-line caption
296    let caption_end = pos + 1; // End is exclusive
297
298    // If this line is NOT a caption start, it might be a continuation line
299    // Scan backward through non-blank lines to find the caption start
300    if !is_valid_caption_start_before_table(lines, pos) {
301        // Not a caption start - check if there's a caption start above
302        let mut scan_pos = pos;
303        while scan_pos > 0 {
304            scan_pos -= 1;
305            let line = lines[scan_pos];
306
307            // If we hit a blank line, we've gone too far
308            if line.trim().is_empty() {
309                return None;
310            }
311
312            // If we find a caption start, this is the beginning of the multiline caption
313            if is_valid_caption_start_before_table(lines, scan_pos) {
314                if scan_pos > 0 && !lines[scan_pos - 1].trim().is_empty() {
315                    return None;
316                }
317                if previous_nonblank_looks_like_table(lines, scan_pos) {
318                    return None;
319                }
320                return Some((scan_pos, caption_end));
321            }
322        }
323        // Scanned to beginning without finding caption start
324        None
325    } else {
326        if pos > 0 && !lines[pos - 1].trim().is_empty() {
327            return None;
328        }
329        if previous_nonblank_looks_like_table(lines, pos) {
330            return None;
331        }
332        // This line is a caption start - return the range
333        Some((pos, caption_end))
334    }
335}
336
337fn previous_nonblank_looks_like_table(lines: &[&str], pos: usize) -> bool {
338    if pos == 0 {
339        return false;
340    }
341    let mut i = pos;
342    while i > 0 {
343        i -= 1;
344        let line = lines[i].trim();
345        if line.is_empty() {
346            continue;
347        }
348        return line_looks_like_table_syntax(line);
349    }
350    false
351}
352
353fn line_looks_like_table_syntax(line: &str) -> bool {
354    if line.starts_with('|') && line.matches('|').count() >= 2 {
355        return true;
356    }
357    if line.starts_with('+') && line.ends_with('+') && (line.contains('-') || line.contains('=')) {
358        return true;
359    }
360    try_parse_table_separator(line).is_some()
361        || try_parse_pipe_separator(line).is_some()
362        || try_parse_grid_separator(line).is_some()
363}
364
365/// Find caption after table (if any).
366/// Returns (caption_start, caption_end) positions, or None.
367fn find_caption_after_table(lines: &[&str], table_end: usize) -> Option<(usize, usize)> {
368    if table_end >= lines.len() {
369        return None;
370    }
371
372    let mut pos = table_end;
373
374    // Skip one blank line if present
375    if pos < lines.len() && lines[pos].trim().is_empty() {
376        pos += 1;
377    }
378
379    if pos >= lines.len() {
380        return None;
381    }
382
383    // Check if this line is a caption
384    if is_table_caption_start(lines[pos]) {
385        let caption_start = pos;
386        // Find end of caption (continues until blank line)
387        let mut caption_end = caption_start + 1;
388        while caption_end < lines.len() && !lines[caption_end].trim().is_empty() {
389            caption_end += 1;
390        }
391        Some((caption_start, caption_end))
392    } else {
393        None
394    }
395}
396
397/// Emit a table caption node.
398fn emit_table_caption(
399    builder: &mut GreenNodeBuilder<'static>,
400    lines: &[&str],
401    start: usize,
402    end: usize,
403    config: &ParserOptions,
404) {
405    builder.start_node(SyntaxKind::TABLE_CAPTION.into());
406
407    for (i, line) in lines[start..end].iter().enumerate() {
408        if i == 0 {
409            // First line - parse and emit prefix separately
410            let trimmed = line.trim_start();
411            let leading_ws_len = line.len() - trimmed.len();
412
413            // Emit leading whitespace if present
414            if leading_ws_len > 0 {
415                builder.token(SyntaxKind::WHITESPACE.into(), &line[..leading_ws_len]);
416            }
417
418            // Check for caption prefix and emit separately
419            // Calculate where the prefix ends (after trimmed content)
420            let prefix_and_rest = if line.ends_with('\n') {
421                &line[leading_ws_len..line.len() - 1] // Exclude newline
422            } else {
423                &line[leading_ws_len..]
424            };
425
426            let (prefix_len, prefix_text) = if prefix_and_rest.starts_with("Table: ") {
427                (7, "Table: ")
428            } else if prefix_and_rest.starts_with("table: ") {
429                (7, "table: ")
430            } else if prefix_and_rest.starts_with(": ") {
431                (2, ": ")
432            } else if prefix_and_rest.starts_with(':') {
433                (1, ":")
434            } else {
435                (0, "")
436            };
437
438            if prefix_len > 0 {
439                builder.token(SyntaxKind::TABLE_CAPTION_PREFIX.into(), prefix_text);
440
441                // Emit rest of line after prefix
442                let rest_start = leading_ws_len + prefix_len;
443                if rest_start < line.len() {
444                    // Get the caption text (excluding newline)
445                    let (caption_text, newline_str) = strip_newline(&line[rest_start..]);
446
447                    if !caption_text.is_empty() {
448                        inline_emission::emit_inlines(builder, caption_text, config);
449                    }
450
451                    if !newline_str.is_empty() {
452                        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
453                    }
454                }
455            } else {
456                // No recognized prefix, emit whole trimmed line
457                let (text, newline_str) = strip_newline(&line[leading_ws_len..]);
458
459                if !text.is_empty() {
460                    inline_emission::emit_inlines(builder, text, config);
461                }
462
463                if !newline_str.is_empty() {
464                    builder.token(SyntaxKind::NEWLINE.into(), newline_str);
465                }
466            }
467        } else {
468            // Continuation lines - emit with inline parsing
469            let (text, newline_str) = strip_newline(line);
470
471            if !text.is_empty() {
472                inline_emission::emit_inlines(builder, text, config);
473            }
474
475            if !newline_str.is_empty() {
476                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
477            }
478        }
479    }
480
481    builder.finish_node(); // TABLE_CAPTION
482}
483
484/// Emit a table cell with inline content parsing.
485/// This is the core helper for Phase 7.1 table inline parsing migration.
486fn emit_table_cell(
487    builder: &mut GreenNodeBuilder<'static>,
488    cell_text: &str,
489    config: &ParserOptions,
490) {
491    builder.start_node(SyntaxKind::TABLE_CELL.into());
492
493    // Parse inline content within the cell
494    if !cell_text.is_empty() {
495        inline_emission::emit_inlines(builder, cell_text, config);
496    }
497
498    builder.finish_node(); // TABLE_CELL
499}
500
501/// Determine column alignments based on separator and optional header.
502fn determine_alignments(columns: &mut [Column], separator_line: &str, header_line: Option<&str>) {
503    for col in columns.iter_mut() {
504        let sep_slice = &separator_line[col.start..col.end];
505
506        if let Some(header) = header_line {
507            // Extract header text for this column
508            let header_text = if col.end <= header.len() {
509                header[col.start..col.end].trim()
510            } else if col.start < header.len() {
511                header[col.start..].trim()
512            } else {
513                ""
514            };
515
516            if header_text.is_empty() {
517                col.alignment = Alignment::Default;
518                continue;
519            }
520
521            // Find where the header text starts and ends within the column
522            let header_in_col = &header[col.start..col.end.min(header.len())];
523            let text_start = header_in_col.len() - header_in_col.trim_start().len();
524            let text_end = header_in_col.trim_end().len() + text_start;
525
526            // Check dash alignment relative to text
527            let dashes_start = 0; // Dashes start at beginning of sep_slice
528            let dashes_end = sep_slice.len();
529
530            let flush_left = dashes_start == text_start;
531            let flush_right = dashes_end == text_end;
532
533            col.alignment = match (flush_left, flush_right) {
534                (true, true) => Alignment::Default,
535                (true, false) => Alignment::Left,
536                (false, true) => Alignment::Right,
537                (false, false) => Alignment::Center,
538            };
539        } else {
540            // Without header, alignment based on first row (we'll handle this later)
541            col.alignment = Alignment::Default;
542        }
543    }
544}
545
546/// Try to parse a simple table starting at the given position.
547/// Returns the number of lines consumed if successful.
548pub(crate) fn try_parse_simple_table(
549    lines: &[&str],
550    start_pos: usize,
551    builder: &mut GreenNodeBuilder<'static>,
552    config: &ParserOptions,
553) -> Option<usize> {
554    log::debug!("try_parse_simple_table at line {}", start_pos + 1);
555
556    if start_pos >= lines.len() {
557        return None;
558    }
559
560    // Look for a separator line
561    let separator_pos = find_separator_line(lines, start_pos)?;
562    log::debug!("  found separator at line {}", separator_pos + 1);
563
564    let separator_line = lines[separator_pos];
565    let mut columns = try_parse_table_separator(separator_line)?;
566
567    // Determine if there's a header (separator not at start)
568    let has_header = separator_pos > start_pos;
569    let header_line = if has_header {
570        Some(lines[separator_pos - 1])
571    } else {
572        None
573    };
574
575    // Determine alignments
576    determine_alignments(&mut columns, separator_line, header_line);
577
578    // Find table end (blank line or end of input)
579    let end_pos = find_table_end(lines, separator_pos + 1);
580
581    // Must have at least one data row (or it's just a separator)
582    let data_rows = end_pos - separator_pos - 1;
583
584    if data_rows == 0 {
585        return None;
586    }
587
588    // Check for caption before table
589    let caption_before = find_caption_before_table(lines, start_pos);
590
591    // Check for caption after table
592    let caption_after = if caption_before.is_some() {
593        None
594    } else {
595        find_caption_after_table(lines, end_pos)
596    };
597
598    // Build the table
599    builder.start_node(SyntaxKind::SIMPLE_TABLE.into());
600
601    // Emit caption before if present
602    if let Some((cap_start, cap_end)) = caption_before {
603        emit_table_caption(builder, lines, cap_start, cap_end, config);
604    }
605
606    // Emit header if present
607    if has_header {
608        emit_table_row(
609            builder,
610            lines[separator_pos - 1],
611            &columns,
612            SyntaxKind::TABLE_HEADER,
613            config,
614        );
615    }
616
617    // Emit separator
618    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
619    emit_line_tokens(builder, separator_line);
620    builder.finish_node();
621
622    // Emit data rows
623    for line in lines.iter().take(end_pos).skip(separator_pos + 1) {
624        emit_table_row(builder, line, &columns, SyntaxKind::TABLE_ROW, config);
625    }
626
627    // Emit caption after if present
628    if let Some((cap_start, cap_end)) = caption_after {
629        // Emit blank line before caption if needed
630        if cap_start > end_pos {
631            builder.start_node(SyntaxKind::BLANK_LINE.into());
632            builder.token(SyntaxKind::BLANK_LINE.into(), "\n");
633            builder.finish_node();
634        }
635        emit_table_caption(builder, lines, cap_start, cap_end, config);
636    }
637
638    builder.finish_node(); // SimpleTable
639
640    // Calculate lines consumed (including captions)
641    let table_start = if let Some((cap_start, _)) = caption_before {
642        cap_start
643    } else if has_header {
644        separator_pos - 1
645    } else {
646        separator_pos
647    };
648
649    let table_end = if let Some((_, cap_end)) = caption_after {
650        cap_end
651    } else {
652        end_pos
653    };
654
655    let lines_consumed = table_end - table_start;
656
657    Some(lines_consumed)
658}
659
660/// Find the position of a separator line starting from pos.
661fn find_separator_line(lines: &[&str], start_pos: usize) -> Option<usize> {
662    log::debug!("  find_separator_line from line {}", start_pos + 1);
663
664    // Check first line
665    log::debug!("    checking first line: {:?}", lines[start_pos]);
666    if try_parse_table_separator(lines[start_pos]).is_some() {
667        log::debug!("    separator found at first line");
668        return Some(start_pos);
669    }
670
671    // Check second line (for table with header)
672    if start_pos + 1 < lines.len()
673        && !lines[start_pos].trim().is_empty()
674        && try_parse_table_separator(lines[start_pos + 1]).is_some()
675    {
676        return Some(start_pos + 1);
677    }
678    None
679}
680
681/// Find where the table ends (first blank line or end of input).
682fn find_table_end(lines: &[&str], start_pos: usize) -> usize {
683    for i in start_pos..lines.len() {
684        if lines[i].trim().is_empty() {
685            return i;
686        }
687        // Check if this could be a closing separator
688        if try_parse_table_separator(lines[i]).is_some() {
689            // Check if next line is blank or end
690            if i + 1 >= lines.len() || lines[i + 1].trim().is_empty() {
691                return i + 1;
692            }
693        }
694    }
695    lines.len()
696}
697
698/// Emit a table row (header or data row) with inline-parsed cells for simple tables.
699/// Uses column boundaries from the separator line to extract cells.
700fn emit_table_row(
701    builder: &mut GreenNodeBuilder<'static>,
702    line: &str,
703    columns: &[Column],
704    row_kind: SyntaxKind,
705    config: &ParserOptions,
706) {
707    builder.start_node(row_kind.into());
708
709    let (line_without_newline, newline_str) = strip_newline(line);
710
711    // Emit leading whitespace if present
712    let trimmed = line_without_newline.trim_start();
713    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
714    if leading_ws_len > 0 {
715        builder.token(
716            SyntaxKind::WHITESPACE.into(),
717            &line_without_newline[..leading_ws_len],
718        );
719    }
720
721    // Track where we are in the line (for losslessness)
722    let mut current_pos = 0;
723
724    // Extract and emit cells based on column boundaries
725    for col in columns.iter() {
726        // Calculate actual positions in the trimmed line (accounting for leading whitespace)
727        let cell_start = if col.start >= leading_ws_len {
728            (col.start - leading_ws_len).min(trimmed.len())
729        } else {
730            0
731        };
732
733        let cell_end = if col.end >= leading_ws_len {
734            (col.end - leading_ws_len).min(trimmed.len())
735        } else {
736            0
737        };
738
739        // Extract cell text from column bounds
740        let cell_text = if cell_start < cell_end && cell_start < trimmed.len() {
741            &trimmed[cell_start..cell_end]
742        } else if cell_start < trimmed.len() {
743            &trimmed[cell_start..]
744        } else {
745            ""
746        };
747
748        let cell_content = cell_text.trim();
749        let cell_content_start = cell_text.len() - cell_text.trim_start().len();
750
751        // Emit any whitespace from current position to start of cell content
752        let content_abs_pos = (cell_start + cell_content_start).min(trimmed.len());
753        if current_pos < content_abs_pos {
754            builder.token(
755                SyntaxKind::WHITESPACE.into(),
756                &trimmed[current_pos..content_abs_pos],
757            );
758        }
759
760        // Emit cell with inline parsing
761        emit_table_cell(builder, cell_content, config);
762
763        // Update current position to end of cell content
764        current_pos = content_abs_pos + cell_content.len();
765    }
766
767    // Emit any remaining whitespace after last cell
768    if current_pos < trimmed.len() {
769        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
770    }
771
772    // Emit newline if present
773    if !newline_str.is_empty() {
774        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
775    }
776
777    builder.finish_node();
778}
779
780// ============================================================================
781// Pipe Table Parsing
782// ============================================================================
783
784/// Check if a line is a pipe table separator line.
785/// Returns the column alignments if it's a valid separator.
786fn try_parse_pipe_separator(line: &str) -> Option<Vec<Alignment>> {
787    let trimmed = line.trim();
788
789    // Must contain at least one pipe
790    if !trimmed.contains('|') && !trimmed.contains('+') {
791        return None;
792    }
793
794    // Split by pipes (or + for orgtbl variant)
795    let cells: Vec<&str> = if trimmed.contains('+') {
796        // Orgtbl variant: use + as separator in separator line
797        trimmed.split(['|', '+']).collect()
798    } else {
799        trimmed.split('|').collect()
800    };
801
802    let mut alignments = Vec::new();
803
804    for cell in cells {
805        let cell = cell.trim();
806
807        // Skip empty cells (from leading/trailing pipes)
808        if cell.is_empty() {
809            continue;
810        }
811
812        // Must be dashes with optional colons
813        let starts_colon = cell.starts_with(':');
814        let ends_colon = cell.ends_with(':');
815
816        // Remove colons to check if rest is all dashes
817        let without_colons = cell.trim_start_matches(':').trim_end_matches(':');
818
819        // Must have at least one dash
820        if without_colons.is_empty() || !without_colons.chars().all(|c| c == '-') {
821            return None;
822        }
823
824        // Determine alignment from colon positions
825        let alignment = match (starts_colon, ends_colon) {
826            (true, true) => Alignment::Center,
827            (true, false) => Alignment::Left,
828            (false, true) => Alignment::Right,
829            (false, false) => Alignment::Default,
830        };
831
832        alignments.push(alignment);
833    }
834
835    // Must have at least one column
836    if alignments.is_empty() {
837        None
838    } else {
839        Some(alignments)
840    }
841}
842
843/// Split a pipe table row into cells.
844/// Handles escaped pipes (\|) properly by not splitting on them.
845fn parse_pipe_table_row(line: &str) -> Vec<String> {
846    let trimmed = line.trim();
847
848    let mut cells = Vec::new();
849    let mut current_cell = String::new();
850    let mut chars = trimmed.chars().peekable();
851    let mut char_count = 0;
852
853    while let Some(ch) = chars.next() {
854        char_count += 1;
855        match ch {
856            '\\' => {
857                // Check if next char is a pipe - if so, it's an escaped pipe
858                if let Some(&'|') = chars.peek() {
859                    current_cell.push('\\');
860                    current_cell.push('|');
861                    chars.next(); // consume the pipe
862                } else {
863                    current_cell.push(ch);
864                }
865            }
866            '|' => {
867                // Check if this is the leading pipe (first character)
868                if char_count == 1 {
869                    continue; // Skip leading pipe
870                }
871
872                // End current cell, start new one
873                cells.push(current_cell.trim().to_string());
874                current_cell.clear();
875            }
876            _ => {
877                current_cell.push(ch);
878            }
879        }
880    }
881
882    // Add last cell if it's not empty (it would be empty if line ended with pipe)
883    let trimmed_cell = current_cell.trim().to_string();
884    if !trimmed_cell.is_empty() {
885        cells.push(trimmed_cell);
886    }
887
888    cells
889}
890
891/// Emit a pipe table row with inline-parsed cells.
892/// Preserves losslessness by emitting exact byte representation while parsing cell content inline.
893fn emit_pipe_table_row(
894    builder: &mut GreenNodeBuilder<'static>,
895    line: &str,
896    row_kind: SyntaxKind,
897    config: &ParserOptions,
898) {
899    builder.start_node(row_kind.into());
900
901    let (line_without_newline, newline_str) = strip_newline(line);
902    let trimmed = line_without_newline.trim();
903
904    // Parse cell boundaries
905    let mut cell_starts = Vec::new();
906    let mut cell_ends = Vec::new();
907    let mut in_escape = false;
908
909    // Find all pipe positions (excluding escaped ones)
910    let mut pipe_positions = Vec::new();
911    for (i, ch) in trimmed.char_indices() {
912        if in_escape {
913            in_escape = false;
914            continue;
915        }
916        if ch == '\\' {
917            in_escape = true;
918            continue;
919        }
920        if ch == '|' {
921            pipe_positions.push(i);
922        }
923    }
924
925    // Determine cell boundaries based on pipe positions
926    if pipe_positions.is_empty() {
927        // No pipes - treat entire line as one cell (shouldn't happen for valid pipe tables)
928        cell_starts.push(0);
929        cell_ends.push(trimmed.len());
930    } else {
931        // Check if line starts with pipe
932        let start_pipe = pipe_positions.first() == Some(&0);
933        // Check if line ends with pipe
934        let end_pipe = pipe_positions.last() == Some(&(trimmed.len() - 1));
935
936        if start_pipe {
937            // Skip first pipe
938            for i in 1..pipe_positions.len() {
939                cell_starts.push(pipe_positions[i - 1] + 1);
940                cell_ends.push(pipe_positions[i]);
941            }
942            // Add last cell if there's no trailing pipe
943            if !end_pipe {
944                cell_starts.push(*pipe_positions.last().unwrap() + 1);
945                cell_ends.push(trimmed.len());
946            }
947        } else {
948            // No leading pipe
949            cell_starts.push(0);
950            cell_ends.push(pipe_positions[0]);
951
952            for i in 1..pipe_positions.len() {
953                cell_starts.push(pipe_positions[i - 1] + 1);
954                cell_ends.push(pipe_positions[i]);
955            }
956
957            // Add last cell if there's no trailing pipe
958            if !end_pipe {
959                cell_starts.push(*pipe_positions.last().unwrap() + 1);
960                cell_ends.push(trimmed.len());
961            }
962        }
963    }
964
965    // Emit leading whitespace if present (before trim)
966    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
967    if leading_ws_len > 0 {
968        builder.token(
969            SyntaxKind::WHITESPACE.into(),
970            &line_without_newline[..leading_ws_len],
971        );
972    }
973
974    // Emit cells with pipes
975    for (idx, (start, end)) in cell_starts.iter().zip(cell_ends.iter()).enumerate() {
976        // Emit pipe before cell (except for first cell if no leading pipe)
977        if *start > 0 {
978            builder.token(SyntaxKind::TEXT.into(), "|");
979        } else if idx == 0 && trimmed.starts_with('|') {
980            // Leading pipe
981            builder.token(SyntaxKind::TEXT.into(), "|");
982        }
983
984        // Get cell content with its whitespace
985        let cell_with_ws = &trimmed[*start..*end];
986        let cell_content = cell_with_ws.trim();
987
988        // Emit leading whitespace within cell
989        let cell_leading_ws = &cell_with_ws[..cell_with_ws.len() - cell_with_ws.trim_start().len()];
990        if !cell_leading_ws.is_empty() {
991            builder.token(SyntaxKind::WHITESPACE.into(), cell_leading_ws);
992        }
993
994        // Emit cell with inline parsing
995        emit_table_cell(builder, cell_content, config);
996
997        // Emit trailing whitespace within cell
998        let cell_trailing_ws_start = cell_leading_ws.len() + cell_content.len();
999        if cell_trailing_ws_start < cell_with_ws.len() {
1000            builder.token(
1001                SyntaxKind::WHITESPACE.into(),
1002                &cell_with_ws[cell_trailing_ws_start..],
1003            );
1004        }
1005    }
1006
1007    // Emit trailing pipe if present
1008    if !pipe_positions.is_empty() && trimmed.ends_with('|') {
1009        builder.token(SyntaxKind::TEXT.into(), "|");
1010    }
1011
1012    // Emit trailing whitespace after trim (before newline)
1013    let trailing_ws_start = leading_ws_len + trimmed.len();
1014    if trailing_ws_start < line_without_newline.len() {
1015        builder.token(
1016            SyntaxKind::WHITESPACE.into(),
1017            &line_without_newline[trailing_ws_start..],
1018        );
1019    }
1020
1021    // Emit newline
1022    if !newline_str.is_empty() {
1023        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1024    }
1025
1026    builder.finish_node();
1027}
1028
1029/// Try to parse a pipe table starting at the given position.
1030/// Returns the number of lines consumed if successful.
1031pub(crate) fn try_parse_pipe_table(
1032    lines: &[&str],
1033    start_pos: usize,
1034    builder: &mut GreenNodeBuilder<'static>,
1035    config: &ParserOptions,
1036) -> Option<usize> {
1037    if start_pos + 1 >= lines.len() {
1038        return None;
1039    }
1040
1041    // Check if this line is a caption followed by a table
1042    // If so, the actual table starts after the caption and blank line
1043    let (actual_start, caption_before) = if is_caption_followed_by_table(lines, start_pos) {
1044        let (cap_start, cap_end) = caption_range_starting_at(lines, start_pos)?;
1045        let mut pos = cap_end;
1046        while pos < lines.len() && lines[pos].trim().is_empty() {
1047            pos += 1;
1048        }
1049        (pos, Some((cap_start, cap_end)))
1050    } else {
1051        (start_pos, None)
1052    };
1053
1054    if actual_start + 1 >= lines.len() {
1055        return None;
1056    }
1057
1058    // First line should have pipes (potential header)
1059    let header_line = lines[actual_start];
1060    if !header_line.contains('|') {
1061        return None;
1062    }
1063
1064    // Second line should be separator
1065    let separator_line = lines[actual_start + 1];
1066    let alignments = try_parse_pipe_separator(separator_line)?;
1067
1068    // Parse header cells
1069    let header_cells = parse_pipe_table_row(header_line);
1070
1071    // Number of columns should match (approximately - be lenient)
1072    if header_cells.len() != alignments.len() && !header_cells.is_empty() {
1073        // Only fail if very different
1074        if header_cells.len() < alignments.len() / 2 || header_cells.len() > alignments.len() * 2 {
1075            return None;
1076        }
1077    }
1078
1079    // Find table end (first blank line or end of input)
1080    let mut end_pos = actual_start + 2;
1081    while end_pos < lines.len() {
1082        let line = lines[end_pos];
1083        if line.trim().is_empty() {
1084            break;
1085        }
1086        // Row should have pipes
1087        if !line.contains('|') {
1088            break;
1089        }
1090        end_pos += 1;
1091    }
1092
1093    // Must have at least one data row
1094    if end_pos <= actual_start + 2 {
1095        return None;
1096    }
1097
1098    // Check for caption before table (only if we didn't already detect it)
1099    let caption_before = caption_before.or_else(|| find_caption_before_table(lines, actual_start));
1100
1101    // Check for caption after table
1102    let caption_after = if caption_before.is_some() {
1103        None
1104    } else {
1105        find_caption_after_table(lines, end_pos)
1106    };
1107
1108    // Build the pipe table
1109    builder.start_node(SyntaxKind::PIPE_TABLE.into());
1110
1111    // Emit caption before if present
1112    if let Some((cap_start, cap_end)) = caption_before {
1113        emit_table_caption(builder, lines, cap_start, cap_end, config);
1114        // Emit blank line between caption and table if present
1115        if cap_end < actual_start {
1116            for line in lines.iter().take(actual_start).skip(cap_end) {
1117                if line.trim().is_empty() {
1118                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1119                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1120                    builder.finish_node();
1121                }
1122            }
1123        }
1124    }
1125
1126    // Emit header row with inline-parsed cells
1127    emit_pipe_table_row(builder, header_line, SyntaxKind::TABLE_HEADER, config);
1128
1129    // Emit separator
1130    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1131    emit_line_tokens(builder, separator_line);
1132    builder.finish_node();
1133
1134    // Emit data rows with inline-parsed cells
1135    for line in lines.iter().take(end_pos).skip(actual_start + 2) {
1136        emit_pipe_table_row(builder, line, SyntaxKind::TABLE_ROW, config);
1137    }
1138
1139    // Emit caption after if present
1140    if let Some((cap_start, cap_end)) = caption_after {
1141        // Emit blank line before caption if needed
1142        if cap_start > end_pos {
1143            builder.start_node(SyntaxKind::BLANK_LINE.into());
1144            builder.token(SyntaxKind::BLANK_LINE.into(), "\n");
1145            builder.finish_node();
1146        }
1147        emit_table_caption(builder, lines, cap_start, cap_end, config);
1148    }
1149
1150    builder.finish_node(); // PipeTable
1151
1152    // Calculate lines consumed
1153    let table_start = caption_before
1154        .map(|(start, _)| start)
1155        .unwrap_or(actual_start);
1156    let table_end = if let Some((_, cap_end)) = caption_after {
1157        cap_end
1158    } else {
1159        end_pos
1160    };
1161
1162    Some(table_end - table_start)
1163}
1164
1165#[cfg(test)]
1166mod tests {
1167    use super::*;
1168
1169    #[test]
1170    fn test_separator_detection() {
1171        assert!(try_parse_table_separator("------- ------ ----------   -------").is_some());
1172        assert!(try_parse_table_separator("  ---  ---  ---").is_some());
1173        assert!(try_parse_table_separator("-------").is_none()); // horizontal rule
1174        assert!(try_parse_table_separator("--- --- ---").is_some()); // table separator
1175    }
1176
1177    #[test]
1178    fn test_column_extraction() {
1179        let line = "-------     ------ ----------   -------";
1180        let columns = extract_columns(line, 0);
1181        assert_eq!(columns.len(), 4);
1182    }
1183
1184    #[test]
1185    fn test_simple_table_with_header() {
1186        let input = vec![
1187            "  Right     Left     Center     Default",
1188            "-------     ------ ----------   -------",
1189            "     12     12        12            12",
1190            "    123     123       123          123",
1191            "",
1192        ];
1193
1194        let mut builder = GreenNodeBuilder::new();
1195        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1196
1197        assert!(result.is_some());
1198        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1199    }
1200
1201    #[test]
1202    fn test_headerless_table() {
1203        let input = vec![
1204            "-------     ------ ----------   -------",
1205            "     12     12        12            12",
1206            "    123     123       123          123",
1207            "",
1208        ];
1209
1210        let mut builder = GreenNodeBuilder::new();
1211        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1212
1213        assert!(result.is_some());
1214        assert_eq!(result.unwrap(), 3); // sep + 2 rows
1215    }
1216
1217    #[test]
1218    fn test_caption_prefix_detection() {
1219        assert!(try_parse_caption_prefix("Table: My caption").is_some());
1220        assert!(try_parse_caption_prefix("table: My caption").is_some());
1221        assert!(try_parse_caption_prefix(": My caption").is_some());
1222        assert!(try_parse_caption_prefix(":").is_none()); // Just colon, no content
1223        assert!(try_parse_caption_prefix("Not a caption").is_none());
1224    }
1225
1226    #[test]
1227    fn bare_colon_fenced_code_is_not_table_caption() {
1228        let input = "Term\n: ```\n  code\n  ```\n";
1229        let tree = crate::parse(input, None);
1230
1231        assert!(
1232            tree.descendants()
1233                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1234            "should parse as definition list"
1235        );
1236        assert!(
1237            tree.descendants()
1238                .any(|node| node.kind() == SyntaxKind::CODE_BLOCK),
1239            "definition should preserve fenced code block"
1240        );
1241        assert!(
1242            !tree
1243                .descendants()
1244                .any(|node| node.kind() == SyntaxKind::TABLE_CAPTION),
1245            "fenced code definition should not be parsed as table caption"
1246        );
1247    }
1248
1249    #[test]
1250    fn bare_colon_caption_after_div_opening_is_table_caption() {
1251        let input = "::: {#tbl:panel layout.nrow=\"1\"}\n  : My Caption {#tbl:foo-1}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\n  : My Caption2 {#tbl:foo-2}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\nCaption\n:::\n";
1252        let tree = crate::parse(input, None);
1253
1254        let caption_count = tree
1255            .descendants()
1256            .filter(|node| node.kind() == SyntaxKind::TABLE_CAPTION)
1257            .count();
1258        assert_eq!(
1259            caption_count, 2,
1260            "expected both captions to attach to tables"
1261        );
1262        assert!(
1263            !tree
1264                .descendants()
1265                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1266            "caption lines in this fenced div table layout should not parse as definition list"
1267        );
1268    }
1269
1270    #[test]
1271    fn test_table_with_caption_after() {
1272        let input = vec![
1273            "  Right     Left     Center     Default",
1274            "-------     ------ ----------   -------",
1275            "     12     12        12            12",
1276            "    123     123       123          123",
1277            "",
1278            "Table: Demonstration of simple table syntax.",
1279            "",
1280        ];
1281
1282        let mut builder = GreenNodeBuilder::new();
1283        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1284
1285        assert!(result.is_some());
1286        // Should consume: header + sep + 2 rows + blank + caption
1287        assert_eq!(result.unwrap(), 6);
1288    }
1289
1290    #[test]
1291    fn test_table_with_caption_before() {
1292        let input = vec![
1293            "Table: Demonstration of simple table syntax.",
1294            "",
1295            "  Right     Left     Center     Default",
1296            "-------     ------ ----------   -------",
1297            "     12     12        12            12",
1298            "    123     123       123          123",
1299            "",
1300        ];
1301
1302        let mut builder = GreenNodeBuilder::new();
1303        let result = try_parse_simple_table(&input, 2, &mut builder, &ParserOptions::default());
1304
1305        assert!(result.is_some());
1306        // Should consume: caption + blank + header + sep + 2 rows
1307        assert_eq!(result.unwrap(), 6);
1308    }
1309
1310    #[test]
1311    fn test_caption_with_colon_prefix() {
1312        let input = vec![
1313            "  Right     Left",
1314            "-------     ------",
1315            "     12     12",
1316            "",
1317            ": Short caption",
1318            "",
1319        ];
1320
1321        let mut builder = GreenNodeBuilder::new();
1322        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1323
1324        assert!(result.is_some());
1325        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1326    }
1327
1328    #[test]
1329    fn test_multiline_caption() {
1330        let input = vec![
1331            "  Right     Left",
1332            "-------     ------",
1333            "     12     12",
1334            "",
1335            "Table: This is a longer caption",
1336            "that spans multiple lines.",
1337            "",
1338        ];
1339
1340        let mut builder = GreenNodeBuilder::new();
1341        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1342
1343        assert!(result.is_some());
1344        // Should consume through end of multi-line caption
1345        assert_eq!(result.unwrap(), 6);
1346    }
1347
1348    // Pipe table tests
1349    #[test]
1350    fn test_pipe_separator_detection() {
1351        assert!(try_parse_pipe_separator("|------:|:-----|---------|:------:|").is_some());
1352        assert!(try_parse_pipe_separator("|---|---|").is_some());
1353        assert!(try_parse_pipe_separator("-----|-----:").is_some()); // No leading pipe
1354        assert!(try_parse_pipe_separator("|-----+-------|").is_some()); // Orgtbl variant
1355        assert!(try_parse_pipe_separator("not a separator").is_none());
1356    }
1357
1358    #[test]
1359    fn test_pipe_alignments() {
1360        let aligns = try_parse_pipe_separator("|------:|:-----|---------|:------:|").unwrap();
1361        assert_eq!(aligns.len(), 4);
1362        assert_eq!(aligns[0], Alignment::Right);
1363        assert_eq!(aligns[1], Alignment::Left);
1364        assert_eq!(aligns[2], Alignment::Default);
1365        assert_eq!(aligns[3], Alignment::Center);
1366    }
1367
1368    #[test]
1369    fn test_parse_pipe_table_row() {
1370        let cells = parse_pipe_table_row("| Right | Left | Center |");
1371        assert_eq!(cells.len(), 3);
1372        assert_eq!(cells[0], "Right");
1373        assert_eq!(cells[1], "Left");
1374        assert_eq!(cells[2], "Center");
1375
1376        // Without leading/trailing pipes
1377        let cells2 = parse_pipe_table_row("Right | Left | Center");
1378        assert_eq!(cells2.len(), 3);
1379    }
1380
1381    #[test]
1382    fn test_basic_pipe_table() {
1383        let input = vec![
1384            "",
1385            "| Right | Left | Center |",
1386            "|------:|:-----|:------:|",
1387            "|   12  |  12  |   12   |",
1388            "|  123  |  123 |  123   |",
1389            "",
1390        ];
1391
1392        let mut builder = GreenNodeBuilder::new();
1393        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1394
1395        assert!(result.is_some());
1396        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1397    }
1398
1399    #[test]
1400    fn test_pipe_table_no_edge_pipes() {
1401        let input = vec![
1402            "",
1403            "fruit| price",
1404            "-----|-----:",
1405            "apple|2.05",
1406            "pear|1.37",
1407            "",
1408        ];
1409
1410        let mut builder = GreenNodeBuilder::new();
1411        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1412
1413        assert!(result.is_some());
1414        assert_eq!(result.unwrap(), 4);
1415    }
1416
1417    #[test]
1418    fn test_pipe_table_with_caption() {
1419        let input = vec![
1420            "",
1421            "| Col1 | Col2 |",
1422            "|------|------|",
1423            "| A    | B    |",
1424            "",
1425            "Table: My pipe table",
1426            "",
1427        ];
1428
1429        let mut builder = GreenNodeBuilder::new();
1430        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1431
1432        assert!(result.is_some());
1433        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1434    }
1435
1436    #[test]
1437    fn test_pipe_table_with_multiline_caption_before() {
1438        let input = vec![
1439            ": (#tab:base) base R quoting",
1440            "functions",
1441            "",
1442            "| C | D |",
1443            "|---|---|",
1444            "| 3 | 4 |",
1445            "",
1446        ];
1447
1448        let mut builder = GreenNodeBuilder::new();
1449        let result = try_parse_pipe_table(&input, 0, &mut builder, &ParserOptions::default());
1450
1451        assert!(result.is_some());
1452        // caption(2) + blank(1) + header + sep + row
1453        assert_eq!(result.unwrap(), 6);
1454    }
1455}
1456
1457// ============================================================================
1458// Grid Table Parsing
1459// ============================================================================
1460
1461/// Check if a line is a grid table row separator (starts with +, contains -, ends with +).
1462/// Returns Some(vec of column info) if valid, None otherwise.
1463fn try_parse_grid_separator(line: &str) -> Option<Vec<GridColumn>> {
1464    let trimmed = line.trim_start();
1465    let leading_spaces = line.len() - trimmed.len();
1466
1467    // Must have leading spaces <= 3 to not be a code block
1468    if leading_spaces > 3 {
1469        return None;
1470    }
1471
1472    // Must start with + and end with +
1473    if !trimmed.starts_with('+') || !trimmed.trim_end().ends_with('+') {
1474        return None;
1475    }
1476
1477    // Split by + to get column segments
1478    let trimmed = trimmed.trim_end();
1479    let segments: Vec<&str> = trimmed.split('+').collect();
1480
1481    // Need at least 3 parts: empty before first +, column(s), empty after last +
1482    if segments.len() < 3 {
1483        return None;
1484    }
1485
1486    let mut columns = Vec::new();
1487
1488    // Parse each segment between + signs
1489    for segment in segments.iter().skip(1).take(segments.len() - 2) {
1490        if segment.is_empty() {
1491            continue;
1492        }
1493
1494        // Segment must be dashes/equals with optional colons for alignment
1495        let seg_trimmed = *segment;
1496
1497        // Get the fill character (after removing colons)
1498        let inner = seg_trimmed.trim_start_matches(':').trim_end_matches(':');
1499
1500        // Must be all dashes or all equals
1501        if inner.is_empty() {
1502            return None;
1503        }
1504
1505        let first_char = inner.chars().next().unwrap();
1506        if first_char != '-' && first_char != '=' {
1507            return None;
1508        }
1509
1510        if !inner.chars().all(|c| c == first_char) {
1511            return None;
1512        }
1513
1514        let is_header_sep = first_char == '=';
1515
1516        columns.push(GridColumn {
1517            is_header_separator: is_header_sep,
1518            width: seg_trimmed.chars().count(),
1519        });
1520    }
1521
1522    if columns.is_empty() {
1523        None
1524    } else {
1525        Some(columns)
1526    }
1527}
1528
1529/// Column information for grid tables.
1530#[derive(Debug, Clone)]
1531struct GridColumn {
1532    is_header_separator: bool,
1533    width: usize,
1534}
1535
1536fn slice_cell_by_display_width(line: &str, start_byte: usize, width: usize) -> (usize, usize) {
1537    let mut end_byte = start_byte;
1538    let mut display_cols = 0usize;
1539
1540    for (offset, ch) in line[start_byte..].char_indices() {
1541        if ch == '|' {
1542            let sep_byte = start_byte + offset;
1543            return (sep_byte, sep_byte + 1);
1544        }
1545        let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
1546        if display_cols + ch_width > width {
1547            break;
1548        }
1549        display_cols += ch_width;
1550        end_byte = start_byte + offset + ch.len_utf8();
1551        if display_cols >= width {
1552            break;
1553        }
1554    }
1555
1556    // If the width budget is exhausted before seeing a separator (for example
1557    // because of padding/layout drift), advance to the next literal separator
1558    // to keep row slicing aligned and preserve losslessness.
1559    let mut sep_byte = end_byte;
1560    while sep_byte < line.len() {
1561        let mut chars = line[sep_byte..].chars();
1562        let Some(ch) = chars.next() else {
1563            break;
1564        };
1565        if ch == '|' {
1566            return (sep_byte, sep_byte + 1);
1567        }
1568        sep_byte += ch.len_utf8();
1569    }
1570
1571    (end_byte, end_byte)
1572}
1573
1574/// Check if a line is a grid table content row.
1575/// Accepts normal rows ending with `|` and spanning-style continuation lines ending with `+`.
1576fn is_grid_content_row(line: &str) -> bool {
1577    let trimmed = line.trim_start();
1578    let leading_spaces = line.len() - trimmed.len();
1579
1580    if leading_spaces > 3 {
1581        return false;
1582    }
1583
1584    let trimmed = trimmed.trim_end();
1585    trimmed.starts_with('|') && (trimmed.ends_with('|') || trimmed.ends_with('+'))
1586}
1587
1588/// Extract cell contents from a single grid table row line.
1589/// Returns a vector of cell contents (trimmed) based on column boundaries.
1590/// Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1591fn extract_grid_cells_from_line(line: &str, _columns: &[GridColumn]) -> Vec<String> {
1592    let (line_content, _) = strip_newline(line);
1593    let line_trimmed = line_content.trim();
1594
1595    if !line_trimmed.starts_with('|') || !line_trimmed.ends_with('|') {
1596        return vec![String::new(); _columns.len()];
1597    }
1598
1599    let mut cells = Vec::with_capacity(_columns.len());
1600    let mut pos_byte = 1; // Skip leading pipe
1601
1602    for col in _columns {
1603        let col_idx = cells.len();
1604        if pos_byte >= line_trimmed.len() {
1605            cells.push(String::new());
1606            continue;
1607        }
1608
1609        let start_byte = pos_byte;
1610        let end_byte = if col_idx + 1 == _columns.len() {
1611            line_trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1612        } else {
1613            let (end, next_start) = slice_cell_by_display_width(line_trimmed, pos_byte, col.width);
1614            pos_byte = next_start;
1615            end
1616        };
1617        cells.push(line_trimmed[start_byte..end_byte].trim().to_string());
1618        if col_idx + 1 == _columns.len() {
1619            pos_byte = line_trimmed.len();
1620        }
1621    }
1622
1623    cells
1624}
1625
1626/// Extract cell contents from multiple grid table row lines (for multi-line cells).
1627/// Concatenates cell contents across lines with newlines, then trims.
1628fn extract_grid_cells_multiline(lines: &[&str], columns: &[GridColumn]) -> Vec<String> {
1629    if lines.is_empty() {
1630        return vec![String::new(); columns.len()];
1631    }
1632
1633    extract_grid_cells_from_line(lines[0], columns)
1634}
1635
1636/// Emit a grid table row with inline-parsed cells.
1637/// Handles multi-line rows by emitting first line with TABLE_CELL nodes,
1638/// then continuation lines as raw TEXT for losslessness.
1639fn emit_grid_table_row(
1640    builder: &mut GreenNodeBuilder<'static>,
1641    lines: &[&str],
1642    columns: &[GridColumn],
1643    row_kind: SyntaxKind,
1644    config: &ParserOptions,
1645) {
1646    if lines.is_empty() {
1647        return;
1648    }
1649
1650    // Extract cell contents from the first line.
1651    let cell_contents = extract_grid_cells_multiline(lines, columns);
1652
1653    builder.start_node(row_kind.into());
1654
1655    // Emit first line with TABLE_CELL nodes
1656    // Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1657    let first_line = lines[0];
1658    let (line_without_newline, newline_str) = strip_newline(first_line);
1659    let trimmed = line_without_newline.trim();
1660    let expected_pipe_count = columns.len().saturating_add(1);
1661    let actual_pipe_count = trimmed.chars().filter(|&c| c == '|').count();
1662
1663    // Rows that don't contain all expected column separators (spanning-style rows)
1664    // must be emitted verbatim for losslessness.
1665    if actual_pipe_count != expected_pipe_count {
1666        emit_line_tokens(builder, first_line);
1667        for line in lines.iter().skip(1) {
1668            emit_line_tokens(builder, line);
1669        }
1670        builder.finish_node();
1671        return;
1672    }
1673
1674    // Emit leading whitespace
1675    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1676    if leading_ws_len > 0 {
1677        builder.token(
1678            SyntaxKind::WHITESPACE.into(),
1679            &line_without_newline[..leading_ws_len],
1680        );
1681    }
1682
1683    // Emit leading pipe
1684    if trimmed.starts_with('|') {
1685        builder.token(SyntaxKind::TEXT.into(), "|");
1686    }
1687
1688    // Emit each cell based on fixed column widths from separators
1689    let mut pos_byte = 1usize; // after leading pipe
1690    for (idx, cell_content) in cell_contents.iter().enumerate() {
1691        let part = if idx < columns.len() && pos_byte <= trimmed.len() {
1692            let start_byte = pos_byte;
1693            let end_byte = if idx + 1 == columns.len() && !trimmed.is_empty() {
1694                trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1695            } else {
1696                let (end, next_start) =
1697                    slice_cell_by_display_width(trimmed, pos_byte, columns[idx].width);
1698                pos_byte = next_start;
1699                end
1700            };
1701            let slice = &trimmed[start_byte..end_byte];
1702            if idx + 1 == columns.len() {
1703                pos_byte = trimmed.len();
1704            }
1705            slice
1706        } else {
1707            ""
1708        };
1709
1710        // Emit leading whitespace in cell
1711        let cell_trimmed = part.trim();
1712        let ws_start_len = part.len() - part.trim_start().len();
1713        if ws_start_len > 0 {
1714            builder.token(SyntaxKind::WHITESPACE.into(), &part[..ws_start_len]);
1715        }
1716
1717        // Emit TABLE_CELL with inline parsing
1718        emit_table_cell(builder, cell_content, config);
1719
1720        // Emit trailing whitespace in cell
1721        let ws_end_start = ws_start_len + cell_trimmed.len();
1722        if ws_end_start < part.len() {
1723            builder.token(SyntaxKind::WHITESPACE.into(), &part[ws_end_start..]);
1724        }
1725
1726        // Emit pipe separator (unless this is the last cell and line doesn't end with |)
1727        if idx < cell_contents.len() - 1 || trimmed.ends_with('|') {
1728            builder.token(SyntaxKind::TEXT.into(), "|");
1729        }
1730    }
1731
1732    // Emit trailing whitespace before newline
1733    let trailing_ws_start = leading_ws_len + trimmed.len();
1734    if trailing_ws_start < line_without_newline.len() {
1735        builder.token(
1736            SyntaxKind::WHITESPACE.into(),
1737            &line_without_newline[trailing_ws_start..],
1738        );
1739    }
1740
1741    // Emit newline
1742    if !newline_str.is_empty() {
1743        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1744    }
1745
1746    // Emit continuation lines as TEXT for losslessness
1747    for line in lines.iter().skip(1) {
1748        emit_line_tokens(builder, line);
1749    }
1750
1751    builder.finish_node();
1752}
1753
1754/// Try to parse a grid table starting at the given position.
1755/// Returns the number of lines consumed if successful.
1756pub(crate) fn try_parse_grid_table(
1757    lines: &[&str],
1758    start_pos: usize,
1759    builder: &mut GreenNodeBuilder<'static>,
1760    config: &ParserOptions,
1761) -> Option<usize> {
1762    if start_pos >= lines.len() {
1763        return None;
1764    }
1765
1766    // Check if this line is a caption followed by a table
1767    // If so, the actual table starts after the caption and blank line
1768    let (actual_start, caption_before) = if is_caption_followed_by_table(lines, start_pos) {
1769        let (cap_start, cap_end) = caption_range_starting_at(lines, start_pos)?;
1770        let mut pos = cap_end;
1771        while pos < lines.len() && lines[pos].trim().is_empty() {
1772            pos += 1;
1773        }
1774        (pos, Some((cap_start, cap_end)))
1775    } else {
1776        (start_pos, None)
1777    };
1778
1779    if actual_start >= lines.len() {
1780        return None;
1781    }
1782
1783    // First line must be a grid separator
1784    let first_line = lines[actual_start];
1785    let _columns = try_parse_grid_separator(first_line)?;
1786
1787    // Track table structure
1788    let mut end_pos = actual_start + 1;
1789    let mut found_header_sep = false;
1790    let mut in_footer = false;
1791
1792    // Scan table lines
1793    while end_pos < lines.len() {
1794        let line = lines[end_pos];
1795
1796        // Check for blank line (table ends)
1797        if line.trim().is_empty() {
1798            break;
1799        }
1800
1801        // Check for separator line
1802        if let Some(sep_cols) = try_parse_grid_separator(line) {
1803            // Check if this is a header separator (=)
1804            if sep_cols.iter().any(|c| c.is_header_separator) {
1805                if !found_header_sep {
1806                    found_header_sep = true;
1807                } else if !in_footer {
1808                    // Second = separator starts footer
1809                    in_footer = true;
1810                }
1811            }
1812            end_pos += 1;
1813            continue;
1814        }
1815
1816        // Check for content row
1817        if is_grid_content_row(line) {
1818            end_pos += 1;
1819            continue;
1820        }
1821
1822        // Not a valid grid table line - table ends
1823        break;
1824    }
1825
1826    // Must have consumed at least 3 lines (top separator, content, bottom separator)
1827    // Or just top + content rows that end with a separator
1828    if end_pos <= actual_start + 1 {
1829        return None;
1830    }
1831
1832    // Last consumed line should be a separator for a well-formed table
1833    // But we'll be lenient and accept tables ending with content rows
1834
1835    // Check for caption before table (only if we didn't already detected it)
1836    let caption_before = caption_before.or_else(|| find_caption_before_table(lines, actual_start));
1837
1838    // Check for caption after table
1839    let caption_after = if caption_before.is_some() {
1840        None
1841    } else {
1842        find_caption_after_table(lines, end_pos)
1843    };
1844
1845    // Build the grid table
1846    builder.start_node(SyntaxKind::GRID_TABLE.into());
1847
1848    // Emit caption before if present
1849    if let Some((cap_start, cap_end)) = caption_before {
1850        emit_table_caption(builder, lines, cap_start, cap_end, config);
1851        // Emit blank line between caption and table if present
1852        if cap_end < actual_start {
1853            for line in lines.iter().take(actual_start).skip(cap_end) {
1854                if line.trim().is_empty() {
1855                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1856                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1857                    builder.finish_node();
1858                }
1859            }
1860        }
1861    }
1862
1863    // Track whether we've passed the header separator
1864    let mut past_header_sep = false;
1865    let mut in_footer_section = false;
1866    let mut current_row_lines: Vec<&str> = Vec::new();
1867    let mut current_row_kind = SyntaxKind::TABLE_HEADER;
1868
1869    // Emit table rows - accumulate multi-line cells
1870    for line in lines.iter().take(end_pos).skip(actual_start) {
1871        if let Some(sep_cols) = try_parse_grid_separator(line) {
1872            // Separator line - emit any accumulated row first
1873            if !current_row_lines.is_empty() {
1874                emit_grid_table_row(
1875                    builder,
1876                    &current_row_lines,
1877                    &sep_cols,
1878                    current_row_kind,
1879                    config,
1880                );
1881                current_row_lines.clear();
1882            }
1883
1884            let is_header_sep = sep_cols.iter().any(|c| c.is_header_separator);
1885
1886            if is_header_sep {
1887                if !past_header_sep {
1888                    // This is the header/body separator
1889                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1890                    emit_line_tokens(builder, line);
1891                    builder.finish_node();
1892                    past_header_sep = true;
1893                } else {
1894                    // Footer separator
1895                    if !in_footer_section {
1896                        in_footer_section = true;
1897                    }
1898                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1899                    emit_line_tokens(builder, line);
1900                    builder.finish_node();
1901                }
1902            } else {
1903                // Regular separator (row boundary)
1904                builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1905                emit_line_tokens(builder, line);
1906                builder.finish_node();
1907            }
1908        } else if is_grid_content_row(line) {
1909            // Content row - accumulate for multi-line cells
1910            current_row_kind = if !past_header_sep && found_header_sep {
1911                SyntaxKind::TABLE_HEADER
1912            } else if in_footer_section {
1913                SyntaxKind::TABLE_FOOTER
1914            } else {
1915                SyntaxKind::TABLE_ROW
1916            };
1917
1918            current_row_lines.push(line);
1919        }
1920    }
1921
1922    // Emit any remaining accumulated row
1923    if !current_row_lines.is_empty() {
1924        // Use first separator's columns for cell boundaries
1925        if let Some(sep_cols) = try_parse_grid_separator(lines[actual_start]) {
1926            emit_grid_table_row(
1927                builder,
1928                &current_row_lines,
1929                &sep_cols,
1930                current_row_kind,
1931                config,
1932            );
1933        }
1934    }
1935
1936    // Emit caption after if present
1937    if let Some((cap_start, cap_end)) = caption_after {
1938        if cap_start > end_pos {
1939            builder.start_node(SyntaxKind::BLANK_LINE.into());
1940            builder.token(SyntaxKind::BLANK_LINE.into(), "\n");
1941            builder.finish_node();
1942        }
1943        emit_table_caption(builder, lines, cap_start, cap_end, config);
1944    }
1945
1946    builder.finish_node(); // GRID_TABLE
1947
1948    // Calculate lines consumed
1949    let table_start = caption_before
1950        .map(|(start, _)| start)
1951        .unwrap_or(actual_start);
1952    let table_end = if let Some((_, cap_end)) = caption_after {
1953        cap_end
1954    } else {
1955        end_pos
1956    };
1957
1958    Some(table_end - table_start)
1959}
1960
1961#[cfg(test)]
1962mod grid_table_tests {
1963    use super::*;
1964
1965    #[test]
1966    fn test_grid_separator_detection() {
1967        assert!(try_parse_grid_separator("+---+---+").is_some());
1968        assert!(try_parse_grid_separator("+===+===+").is_some());
1969        assert!(try_parse_grid_separator("+---------------+---------------+").is_some());
1970        assert!(try_parse_grid_separator("+:---:+").is_some()); // center aligned
1971        assert!(try_parse_grid_separator("not a separator").is_none());
1972        assert!(try_parse_grid_separator("|---|---|").is_none()); // pipe table sep
1973    }
1974
1975    #[test]
1976    fn test_grid_header_separator() {
1977        let cols = try_parse_grid_separator("+===+===+").unwrap();
1978        assert!(cols.iter().all(|c| c.is_header_separator));
1979
1980        let cols2 = try_parse_grid_separator("+---+---+").unwrap();
1981        assert!(cols2.iter().all(|c| !c.is_header_separator));
1982    }
1983
1984    #[test]
1985    fn test_grid_content_row_detection() {
1986        assert!(is_grid_content_row("| content | content |"));
1987        assert!(is_grid_content_row("|  |  |"));
1988        assert!(is_grid_content_row("| content +------+"));
1989        assert!(!is_grid_content_row("+---+---+")); // separator, not content
1990        assert!(!is_grid_content_row("no pipes here"));
1991    }
1992
1993    #[test]
1994    fn test_basic_grid_table() {
1995        let input = vec![
1996            "+-------+-------+",
1997            "| Col1  | Col2  |",
1998            "+=======+=======+",
1999            "| A     | B     |",
2000            "+-------+-------+",
2001            "",
2002        ];
2003
2004        let mut builder = GreenNodeBuilder::new();
2005        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2006
2007        assert!(result.is_some());
2008        assert_eq!(result.unwrap(), 5);
2009    }
2010
2011    #[test]
2012    fn test_grid_table_multirow() {
2013        let input = vec![
2014            "+---------------+---------------+",
2015            "| Fruit         | Advantages    |",
2016            "+===============+===============+",
2017            "| Bananas       | - wrapper     |",
2018            "|               | - color       |",
2019            "+---------------+---------------+",
2020            "| Oranges       | - scurvy      |",
2021            "|               | - tasty       |",
2022            "+---------------+---------------+",
2023            "",
2024        ];
2025
2026        let mut builder = GreenNodeBuilder::new();
2027        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2028
2029        assert!(result.is_some());
2030        assert_eq!(result.unwrap(), 9);
2031    }
2032
2033    #[test]
2034    fn test_grid_table_with_footer() {
2035        let input = vec![
2036            "+-------+-------+",
2037            "| Fruit | Price |",
2038            "+=======+=======+",
2039            "| Apple | $1.00 |",
2040            "+-------+-------+",
2041            "| Pear  | $1.50 |",
2042            "+=======+=======+",
2043            "| Total | $2.50 |",
2044            "+=======+=======+",
2045            "",
2046        ];
2047
2048        let mut builder = GreenNodeBuilder::new();
2049        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2050
2051        assert!(result.is_some());
2052        assert_eq!(result.unwrap(), 9);
2053    }
2054
2055    #[test]
2056    fn test_grid_table_headerless() {
2057        let input = vec![
2058            "+-------+-------+",
2059            "| A     | B     |",
2060            "+-------+-------+",
2061            "| C     | D     |",
2062            "+-------+-------+",
2063            "",
2064        ];
2065
2066        let mut builder = GreenNodeBuilder::new();
2067        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2068
2069        assert!(result.is_some());
2070        assert_eq!(result.unwrap(), 5);
2071    }
2072
2073    #[test]
2074    fn test_grid_table_with_caption_before() {
2075        let input = vec![
2076            ": Sample table",
2077            "",
2078            "+-------+-------+",
2079            "| A     | B     |",
2080            "+=======+=======+",
2081            "| C     | D     |",
2082            "+-------+-------+",
2083            "",
2084        ];
2085
2086        let mut builder = GreenNodeBuilder::new();
2087        let result = try_parse_grid_table(&input, 2, &mut builder, &ParserOptions::default());
2088
2089        assert!(result.is_some());
2090        // Should include caption + blank + table
2091        assert_eq!(result.unwrap(), 7);
2092    }
2093
2094    #[test]
2095    fn test_grid_table_with_caption_after() {
2096        let input = vec![
2097            "+-------+-------+",
2098            "| A     | B     |",
2099            "+=======+=======+",
2100            "| C     | D     |",
2101            "+-------+-------+",
2102            "",
2103            "Table: My grid table",
2104            "",
2105        ];
2106
2107        let mut builder = GreenNodeBuilder::new();
2108        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2109
2110        assert!(result.is_some());
2111        // table + blank + caption
2112        assert_eq!(result.unwrap(), 7);
2113    }
2114}
2115
2116// ============================================================================
2117// Multiline Table Parsing
2118// ============================================================================
2119
2120/// Check if a line is a multiline table separator (continuous dashes).
2121/// Multiline table separators span the full width and are all dashes.
2122/// Returns Some(columns) if valid, None otherwise.
2123fn try_parse_multiline_separator(line: &str) -> Option<Vec<Column>> {
2124    let trimmed = line.trim_start();
2125    let leading_spaces = line.len() - trimmed.len();
2126
2127    // Must have leading spaces <= 3 to not be a code block
2128    if leading_spaces > 3 {
2129        return None;
2130    }
2131
2132    let trimmed = trimmed.trim_end();
2133
2134    // Must be all dashes (continuous line of dashes)
2135    if trimmed.is_empty() || !trimmed.chars().all(|c| c == '-') {
2136        return None;
2137    }
2138
2139    // Must have at least 3 dashes
2140    if trimmed.len() < 3 {
2141        return None;
2142    }
2143
2144    // This is a full-width separator - columns will be determined by column separator lines
2145    Some(vec![Column {
2146        start: leading_spaces,
2147        end: leading_spaces + trimmed.len(),
2148        alignment: Alignment::Default,
2149    }])
2150}
2151
2152/// Check if a line is a column separator line for multiline tables.
2153/// Column separators have dashes with spaces between them to define columns.
2154fn is_column_separator(line: &str) -> bool {
2155    try_parse_table_separator(line).is_some() && !line.contains('*') && !line.contains('_')
2156}
2157
2158fn is_headerless_single_row_without_blank(
2159    lines: &[&str],
2160    row_start: usize,
2161    row_end: usize,
2162    columns: &[Column],
2163) -> bool {
2164    if row_start >= row_end {
2165        return false;
2166    }
2167
2168    if row_end - row_start == 1 {
2169        return false;
2170    }
2171
2172    let Some(last_col) = columns.last() else {
2173        return false;
2174    };
2175
2176    for line in lines.iter().take(row_end).skip(row_start + 1) {
2177        let (content, _) = strip_newline(line);
2178        let prefix_end = last_col.start.min(content.len());
2179        if !content[..prefix_end].trim().is_empty() {
2180            return false;
2181        }
2182    }
2183
2184    true
2185}
2186
2187/// Try to parse a multiline table starting at the given position.
2188/// Returns the number of lines consumed if successful.
2189pub(crate) fn try_parse_multiline_table(
2190    lines: &[&str],
2191    start_pos: usize,
2192    builder: &mut GreenNodeBuilder<'static>,
2193    config: &ParserOptions,
2194) -> Option<usize> {
2195    if start_pos >= lines.len() {
2196        return None;
2197    }
2198
2199    let first_line = lines[start_pos];
2200
2201    // First line can be either:
2202    // 1. A full-width dash separator (for tables with headers)
2203    // 2. A column separator (for headerless tables)
2204    let is_full_width_start = try_parse_multiline_separator(first_line).is_some();
2205    let is_column_sep_start = !is_full_width_start && is_column_separator(first_line);
2206    let headerless_columns = if is_column_sep_start {
2207        try_parse_table_separator(first_line)
2208    } else {
2209        None
2210    };
2211
2212    if !is_full_width_start && !is_column_sep_start {
2213        return None;
2214    }
2215
2216    // Look ahead to find the structure
2217    let mut pos = start_pos + 1;
2218    let mut found_column_sep = is_column_sep_start; // Already found if headerless
2219    let mut column_sep_pos = if is_column_sep_start { start_pos } else { 0 };
2220    let mut has_header = false;
2221    let mut found_blank_line = false;
2222    let mut found_closing_sep = false;
2223    let mut content_line_count = 0usize;
2224
2225    // Scan for header section and column separator
2226    while pos < lines.len() {
2227        let line = lines[pos];
2228
2229        // Check for column separator (defines columns) - only if we started with full-width
2230        if is_full_width_start && is_column_separator(line) && !found_column_sep {
2231            found_column_sep = true;
2232            column_sep_pos = pos;
2233            has_header = pos > start_pos + 1; // Has header if there's content before column sep
2234            pos += 1;
2235            continue;
2236        }
2237
2238        // Check for blank line (row separator in body)
2239        if line.trim().is_empty() {
2240            found_blank_line = true;
2241            pos += 1;
2242            // Check if next line is a valid closing separator for this table shape.
2243            if pos < lines.len() {
2244                let next = lines[pos];
2245                let is_valid_closer = if is_full_width_start {
2246                    try_parse_multiline_separator(next).is_some()
2247                } else {
2248                    is_column_separator(next)
2249                };
2250                if is_valid_closer {
2251                    found_closing_sep = true;
2252                    pos += 1; // Include the closing separator
2253                    break;
2254                }
2255            }
2256            continue;
2257        }
2258
2259        // Check for closing full-width dashes (only for full-width-start tables).
2260        if is_full_width_start && try_parse_multiline_separator(line).is_some() {
2261            found_closing_sep = true;
2262            pos += 1;
2263            break;
2264        }
2265
2266        // Check for closing column separator (for headerless tables)
2267        if is_column_sep_start && is_column_separator(line) && content_line_count > 0 {
2268            found_closing_sep = true;
2269            pos += 1;
2270            break;
2271        }
2272
2273        // Content row
2274        content_line_count += 1;
2275        pos += 1;
2276    }
2277
2278    // Must have found a column separator to be a valid multiline table
2279    if !found_column_sep {
2280        return None;
2281    }
2282
2283    // Must have had at least one blank line between rows (distinguishes from simple tables)
2284    if !found_blank_line {
2285        if !is_column_sep_start {
2286            return None;
2287        }
2288        let columns = headerless_columns.as_deref()?;
2289        if !is_headerless_single_row_without_blank(lines, start_pos + 1, pos - 1, columns) {
2290            return None;
2291        }
2292    }
2293
2294    // Must have a closing separator
2295    if !found_closing_sep {
2296        return None;
2297    }
2298
2299    // Must have consumed more than just the opening separator
2300    if pos <= start_pos + 2 {
2301        return None;
2302    }
2303
2304    let end_pos = pos;
2305
2306    // Extract column boundaries from the separator line
2307    let columns =
2308        try_parse_table_separator(lines[column_sep_pos]).expect("Column separator must be valid");
2309
2310    // Check for caption before table
2311    let caption_before = find_caption_before_table(lines, start_pos);
2312
2313    // Check for caption after table
2314    let caption_after = if caption_before.is_some() {
2315        None
2316    } else {
2317        find_caption_after_table(lines, end_pos)
2318    };
2319
2320    // Build the multiline table
2321    builder.start_node(SyntaxKind::MULTILINE_TABLE.into());
2322
2323    // Emit caption before if present
2324    if let Some((cap_start, cap_end)) = caption_before {
2325        emit_table_caption(builder, lines, cap_start, cap_end, config);
2326
2327        // Emit blank line between caption and table if present
2328        if cap_end < start_pos {
2329            for line in lines.iter().take(start_pos).skip(cap_end) {
2330                if line.trim().is_empty() {
2331                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2332                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2333                    builder.finish_node();
2334                }
2335            }
2336        }
2337    }
2338
2339    // Emit opening separator
2340    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2341    emit_line_tokens(builder, lines[start_pos]);
2342    builder.finish_node();
2343
2344    // Track state for emitting
2345    let mut in_header = has_header;
2346    let mut current_row_lines: Vec<&str> = Vec::new();
2347
2348    for (i, line) in lines.iter().enumerate().take(end_pos).skip(start_pos + 1) {
2349        // Column separator (header/body divider)
2350        if i == column_sep_pos {
2351            // Emit any accumulated header lines
2352            if !current_row_lines.is_empty() {
2353                emit_multiline_table_row(
2354                    builder,
2355                    &current_row_lines,
2356                    &columns,
2357                    SyntaxKind::TABLE_HEADER,
2358                    config,
2359                );
2360                current_row_lines.clear();
2361            }
2362
2363            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2364            emit_line_tokens(builder, line);
2365            builder.finish_node();
2366            in_header = false;
2367            continue;
2368        }
2369
2370        // Closing separator (full-width or column separator at end)
2371        if try_parse_multiline_separator(line).is_some() || is_column_separator(line) {
2372            // Emit any accumulated row lines
2373            if !current_row_lines.is_empty() {
2374                let kind = if in_header {
2375                    SyntaxKind::TABLE_HEADER
2376                } else {
2377                    SyntaxKind::TABLE_ROW
2378                };
2379                emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2380                current_row_lines.clear();
2381            }
2382
2383            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2384            emit_line_tokens(builder, line);
2385            builder.finish_node();
2386            continue;
2387        }
2388
2389        // Blank line (row separator)
2390        if line.trim().is_empty() {
2391            // Emit accumulated row
2392            if !current_row_lines.is_empty() {
2393                let kind = if in_header {
2394                    SyntaxKind::TABLE_HEADER
2395                } else {
2396                    SyntaxKind::TABLE_ROW
2397                };
2398                emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2399                current_row_lines.clear();
2400            }
2401
2402            builder.start_node(SyntaxKind::BLANK_LINE.into());
2403            builder.token(SyntaxKind::BLANK_LINE.into(), "\n");
2404            builder.finish_node();
2405            continue;
2406        }
2407
2408        // Content line - accumulate for current row
2409        current_row_lines.push(line);
2410    }
2411
2412    // Emit any remaining accumulated lines
2413    if !current_row_lines.is_empty() {
2414        let kind = if in_header {
2415            SyntaxKind::TABLE_HEADER
2416        } else {
2417            SyntaxKind::TABLE_ROW
2418        };
2419        emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2420    }
2421
2422    // Emit caption after if present
2423    if let Some((cap_start, cap_end)) = caption_after {
2424        if cap_start > end_pos {
2425            builder.start_node(SyntaxKind::BLANK_LINE.into());
2426            builder.token(SyntaxKind::BLANK_LINE.into(), "\n");
2427            builder.finish_node();
2428        }
2429        emit_table_caption(builder, lines, cap_start, cap_end, config);
2430    }
2431
2432    builder.finish_node(); // MultilineTable
2433
2434    // Calculate lines consumed
2435    let table_start = caption_before.map(|(start, _)| start).unwrap_or(start_pos);
2436    let table_end = if let Some((_, cap_end)) = caption_after {
2437        cap_end
2438    } else {
2439        end_pos
2440    };
2441
2442    Some(table_end - table_start)
2443}
2444
2445/// Extract cell contents from first line only (for CST emission).
2446/// Multi-line content will be in continuation TEXT tokens.
2447fn extract_first_line_cell_contents(line: &str, columns: &[Column]) -> Vec<String> {
2448    let (line_content, _) = strip_newline(line);
2449    let mut cells = Vec::new();
2450
2451    for column in columns.iter() {
2452        // Extract FULL text for this column (including whitespace)
2453        let cell_text = if column.end <= line_content.len() {
2454            &line_content[column.start..column.end]
2455        } else if column.start < line_content.len() {
2456            &line_content[column.start..]
2457        } else {
2458            ""
2459        };
2460
2461        cells.push(cell_text.to_string());
2462    }
2463
2464    cells
2465}
2466
2467/// Emit a multiline table row with inline parsing (Phase 7.1).
2468fn emit_multiline_table_row(
2469    builder: &mut GreenNodeBuilder<'static>,
2470    lines: &[&str],
2471    columns: &[Column],
2472    kind: SyntaxKind,
2473    config: &ParserOptions,
2474) {
2475    if lines.is_empty() {
2476        return;
2477    }
2478
2479    // Extract cell contents from first line only (for CST losslessness)
2480    let first_line = lines[0];
2481    let cell_contents = extract_first_line_cell_contents(first_line, columns);
2482
2483    builder.start_node(kind.into());
2484
2485    // Emit first line with TABLE_CELL nodes
2486    let (trimmed, newline_str) = strip_newline(first_line);
2487    let mut current_pos = 0;
2488
2489    for (col_idx, column) in columns.iter().enumerate() {
2490        let cell_text = &cell_contents[col_idx];
2491        let cell_start = column.start.min(trimmed.len());
2492        let cell_end = column.end.min(trimmed.len());
2493
2494        // Emit whitespace before cell
2495        if current_pos < cell_start {
2496            builder.token(
2497                SyntaxKind::WHITESPACE.into(),
2498                &trimmed[current_pos..cell_start],
2499            );
2500        }
2501
2502        // Emit cell with inline parsing (first line content only)
2503        emit_table_cell(builder, cell_text, config);
2504
2505        current_pos = cell_end;
2506    }
2507
2508    // Emit trailing whitespace
2509    if current_pos < trimmed.len() {
2510        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
2511    }
2512
2513    // Emit newline
2514    if !newline_str.is_empty() {
2515        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2516    }
2517
2518    // Emit continuation lines as TEXT to preserve exact line structure
2519    for line in lines.iter().skip(1) {
2520        emit_line_tokens(builder, line);
2521    }
2522
2523    builder.finish_node();
2524}
2525
2526#[cfg(test)]
2527mod multiline_table_tests {
2528    use super::*;
2529    use crate::syntax::SyntaxNode;
2530
2531    #[test]
2532    fn test_multiline_separator_detection() {
2533        assert!(
2534            try_parse_multiline_separator(
2535                "-------------------------------------------------------------"
2536            )
2537            .is_some()
2538        );
2539        assert!(try_parse_multiline_separator("---").is_some());
2540        assert!(try_parse_multiline_separator("  -----").is_some()); // with leading spaces
2541        assert!(try_parse_multiline_separator("--").is_none()); // too short
2542        assert!(try_parse_multiline_separator("--- ---").is_none()); // has spaces
2543        assert!(try_parse_multiline_separator("+---+").is_none()); // grid separator
2544    }
2545
2546    #[test]
2547    fn test_basic_multiline_table() {
2548        let input = vec![
2549            "-------------------------------------------------------------",
2550            " Centered   Default           Right Left",
2551            "  Header    Aligned         Aligned Aligned",
2552            "----------- ------- --------------- -------------------------",
2553            "   First    row                12.0 Example of a row that",
2554            "                                    spans multiple lines.",
2555            "",
2556            "  Second    row                 5.0 Here's another one.",
2557            "-------------------------------------------------------------",
2558            "",
2559        ];
2560
2561        let mut builder = GreenNodeBuilder::new();
2562        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2563
2564        assert!(result.is_some());
2565        assert_eq!(result.unwrap(), 9);
2566    }
2567
2568    #[test]
2569    fn test_multiline_table_headerless() {
2570        let input = vec![
2571            "----------- ------- --------------- -------------------------",
2572            "   First    row                12.0 Example of a row that",
2573            "                                    spans multiple lines.",
2574            "",
2575            "  Second    row                 5.0 Here's another one.",
2576            "----------- ------- --------------- -------------------------",
2577            "",
2578        ];
2579
2580        let mut builder = GreenNodeBuilder::new();
2581        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2582
2583        assert!(result.is_some());
2584        assert_eq!(result.unwrap(), 6);
2585    }
2586
2587    #[test]
2588    fn test_multiline_table_headerless_single_line_is_not_multiline() {
2589        let input = vec![
2590            "-------     ------ ----------   -------",
2591            "     12     12        12             12",
2592            "-------     ------ ----------   -------",
2593            "",
2594            "Not part of table.",
2595            "",
2596        ];
2597
2598        let mut builder = GreenNodeBuilder::new();
2599        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2600
2601        assert!(result.is_none());
2602    }
2603
2604    #[test]
2605    fn test_multiline_table_headerless_single_row_continuation_without_blank_line() {
2606        let input = vec![
2607            "----------  ---------  -----------  ---------------------------",
2608            "   First    row               12.0  Example of a row that spans",
2609            "                                    multiple lines.",
2610            "----------  ---------  -----------  ---------------------------",
2611            "",
2612        ];
2613
2614        let mut builder = GreenNodeBuilder::new();
2615        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2616
2617        assert!(result.is_some());
2618        assert_eq!(result.unwrap(), 4);
2619    }
2620
2621    #[test]
2622    fn test_multiline_table_with_caption() {
2623        let input = vec![
2624            "-------------------------------------------------------------",
2625            " Col1       Col2",
2626            "----------- -------",
2627            "   A        B",
2628            "",
2629            "-------------------------------------------------------------",
2630            "",
2631            "Table: Here's the caption.",
2632            "",
2633        ];
2634
2635        let mut builder = GreenNodeBuilder::new();
2636        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2637
2638        assert!(result.is_some());
2639        // table (6 lines) + blank + caption
2640        assert_eq!(result.unwrap(), 8);
2641    }
2642
2643    #[test]
2644    fn test_multiline_table_single_row() {
2645        let input = vec![
2646            "---------------------------------------------",
2647            " Header1    Header2",
2648            "----------- -----------",
2649            "   Data     More data",
2650            "",
2651            "---------------------------------------------",
2652            "",
2653        ];
2654
2655        let mut builder = GreenNodeBuilder::new();
2656        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2657
2658        assert!(result.is_some());
2659        assert_eq!(result.unwrap(), 6);
2660    }
2661
2662    #[test]
2663    fn test_headerless_multiline_table_does_not_close_on_full_width_rule() {
2664        let input = vec![
2665            "- - - - -",
2666            "Third section with underscores.",
2667            "",
2668            "_____",
2669            "",
2670            "> Quote before rule",
2671            ">",
2672            "> ***",
2673            ">",
2674            "> Quote after rule",
2675            "",
2676            "Final paragraph.",
2677            "",
2678            "Here's a horizontal rule:",
2679            "",
2680            "---",
2681            "Text directly after the horizontal rule.",
2682            "",
2683        ];
2684
2685        let mut builder = GreenNodeBuilder::new();
2686        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2687
2688        assert!(result.is_none());
2689    }
2690
2691    #[test]
2692    fn test_not_multiline_table() {
2693        // Simple table should not be parsed as multiline
2694        let input = vec![
2695            "  Right     Left     Center     Default",
2696            "-------     ------ ----------   -------",
2697            "     12     12        12            12",
2698            "",
2699        ];
2700
2701        let mut builder = GreenNodeBuilder::new();
2702        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2703
2704        // Should not parse because first line isn't a full-width separator
2705        assert!(result.is_none());
2706    }
2707
2708    // Phase 7.1: Unit tests for emit_table_cell() helper
2709    #[test]
2710    fn test_emit_table_cell_plain_text() {
2711        let mut builder = GreenNodeBuilder::new();
2712        emit_table_cell(&mut builder, "Cell", &ParserOptions::default());
2713        let green = builder.finish();
2714        let node = SyntaxNode::new_root(green);
2715
2716        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2717        assert_eq!(node.text(), "Cell");
2718
2719        // Should have TEXT child
2720        let children: Vec<_> = node.children_with_tokens().collect();
2721        assert_eq!(children.len(), 1);
2722        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
2723    }
2724
2725    #[test]
2726    fn test_emit_table_cell_with_emphasis() {
2727        let mut builder = GreenNodeBuilder::new();
2728        emit_table_cell(&mut builder, "*italic*", &ParserOptions::default());
2729        let green = builder.finish();
2730        let node = SyntaxNode::new_root(green);
2731
2732        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2733        assert_eq!(node.text(), "*italic*");
2734
2735        // Should have EMPHASIS child
2736        let children: Vec<_> = node.children().collect();
2737        assert_eq!(children.len(), 1);
2738        assert_eq!(children[0].kind(), SyntaxKind::EMPHASIS);
2739    }
2740
2741    #[test]
2742    fn test_emit_table_cell_with_code() {
2743        let mut builder = GreenNodeBuilder::new();
2744        emit_table_cell(&mut builder, "`code`", &ParserOptions::default());
2745        let green = builder.finish();
2746        let node = SyntaxNode::new_root(green);
2747
2748        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2749        assert_eq!(node.text(), "`code`");
2750
2751        // Should have CODE_SPAN child
2752        let children: Vec<_> = node.children().collect();
2753        assert_eq!(children.len(), 1);
2754        assert_eq!(children[0].kind(), SyntaxKind::INLINE_CODE);
2755    }
2756
2757    #[test]
2758    fn test_emit_table_cell_with_link() {
2759        let mut builder = GreenNodeBuilder::new();
2760        emit_table_cell(&mut builder, "[text](url)", &ParserOptions::default());
2761        let green = builder.finish();
2762        let node = SyntaxNode::new_root(green);
2763
2764        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2765        assert_eq!(node.text(), "[text](url)");
2766
2767        // Should have LINK child
2768        let children: Vec<_> = node.children().collect();
2769        assert_eq!(children.len(), 1);
2770        assert_eq!(children[0].kind(), SyntaxKind::LINK);
2771    }
2772
2773    #[test]
2774    fn test_emit_table_cell_with_strong() {
2775        let mut builder = GreenNodeBuilder::new();
2776        emit_table_cell(&mut builder, "**bold**", &ParserOptions::default());
2777        let green = builder.finish();
2778        let node = SyntaxNode::new_root(green);
2779
2780        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2781        assert_eq!(node.text(), "**bold**");
2782
2783        // Should have STRONG child
2784        let children: Vec<_> = node.children().collect();
2785        assert_eq!(children.len(), 1);
2786        assert_eq!(children[0].kind(), SyntaxKind::STRONG);
2787    }
2788
2789    #[test]
2790    fn test_emit_table_cell_mixed_inline() {
2791        let mut builder = GreenNodeBuilder::new();
2792        emit_table_cell(
2793            &mut builder,
2794            "Text **bold** and `code`",
2795            &ParserOptions::default(),
2796        );
2797        let green = builder.finish();
2798        let node = SyntaxNode::new_root(green);
2799
2800        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2801        assert_eq!(node.text(), "Text **bold** and `code`");
2802
2803        // Should have multiple children: TEXT, STRONG, TEXT, CODE_SPAN
2804        let children: Vec<_> = node.children_with_tokens().collect();
2805        assert!(children.len() >= 4);
2806
2807        // Check some expected types
2808        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
2809        assert_eq!(children[1].kind(), SyntaxKind::STRONG);
2810    }
2811
2812    #[test]
2813    fn test_emit_table_cell_empty() {
2814        let mut builder = GreenNodeBuilder::new();
2815        emit_table_cell(&mut builder, "", &ParserOptions::default());
2816        let green = builder.finish();
2817        let node = SyntaxNode::new_root(green);
2818
2819        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2820        assert_eq!(node.text(), "");
2821
2822        // Empty cell should have no children
2823        let children: Vec<_> = node.children_with_tokens().collect();
2824        assert_eq!(children.len(), 0);
2825    }
2826
2827    #[test]
2828    fn test_emit_table_cell_escaped_pipe() {
2829        let mut builder = GreenNodeBuilder::new();
2830        emit_table_cell(&mut builder, r"A \| B", &ParserOptions::default());
2831        let green = builder.finish();
2832        let node = SyntaxNode::new_root(green);
2833
2834        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2835        // The escaped pipe should be preserved
2836        assert_eq!(node.text(), r"A \| B");
2837    }
2838}