Skip to main content

panache_parser/parser/blocks/
tables.rs

1//! Simple table parsing for Pandoc's simple_tables extension.
2
3use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6use unicode_width::UnicodeWidthChar;
7
8use crate::parser::utils::helpers::{emit_line_tokens, strip_newline};
9use crate::parser::utils::inline_emission;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum Alignment {
13    Left,
14    Right,
15    Center,
16    Default,
17}
18
19/// Column information extracted from the separator line.
20#[derive(Debug, Clone)]
21pub(crate) struct Column {
22    /// Start position (byte index) in the line
23    start: usize,
24    /// End position (byte index) in the line
25    end: usize,
26    /// Column alignment
27    alignment: Alignment,
28}
29
30/// Try to detect if a line is a table separator line.
31/// Returns Some(column positions) if it's a valid separator.
32pub(crate) fn try_parse_table_separator(line: &str) -> Option<Vec<Column>> {
33    let trimmed = line.trim_start();
34    // Strip trailing newline if present (CRLF or LF)
35    let (trimmed, newline_str) = strip_newline(trimmed);
36    let leading_spaces = line.len() - trimmed.len() - newline_str.len();
37
38    // Must have leading spaces <= 3 to not be a code block
39    if leading_spaces > 3 {
40        return None;
41    }
42
43    // Simple tables only use dashed separators.
44    if trimmed.contains('*') || trimmed.contains('_') {
45        return None;
46    }
47
48    // Must contain at least one dash
49    if !trimmed.contains('-') {
50        return None;
51    }
52
53    // A separator line consists of dashes and spaces
54    if !trimmed.chars().all(|c| c == '-' || c == ' ') {
55        return None;
56    }
57
58    // Must not be a horizontal rule.
59    let dash_groups: Vec<_> = trimmed.split(' ').filter(|s| !s.is_empty()).collect();
60    if dash_groups.len() <= 1 {
61        return None;
62    }
63
64    // Extract column positions from dash groups
65    let columns = extract_columns(trimmed, leading_spaces);
66
67    if columns.is_empty() {
68        return None;
69    }
70
71    Some(columns)
72}
73
74/// Extract column positions from a separator line.
75fn extract_columns(separator: &str, offset: usize) -> Vec<Column> {
76    let mut columns = Vec::new();
77    let mut in_dashes = false;
78    let mut col_start = 0;
79
80    for (i, ch) in separator.char_indices() {
81        match ch {
82            '-' if !in_dashes => {
83                col_start = i + offset;
84                in_dashes = true;
85            }
86            ' ' if in_dashes => {
87                columns.push(Column {
88                    start: col_start,
89                    end: i + offset,
90                    alignment: Alignment::Default, // Will be determined later
91                });
92                in_dashes = false;
93            }
94            _ => {}
95        }
96    }
97
98    // Handle last column
99    if in_dashes {
100        columns.push(Column {
101            start: col_start,
102            end: separator.len() + offset,
103            alignment: Alignment::Default,
104        });
105    }
106
107    columns
108}
109
110/// Convert a character column offset into a UTF-8 byte index for `line`.
111///
112/// Simple-table column boundaries come from ASCII separator lines where
113/// character and byte offsets are identical. Data rows may contain multibyte
114/// characters, so we must remap offsets before slicing.
115fn column_offset_to_byte_index(line: &str, offset: usize) -> usize {
116    line.char_indices()
117        .nth(offset)
118        .map_or(line.len(), |(byte_idx, _)| byte_idx)
119}
120
121/// Try to parse a table caption from a line.
122/// Returns Some((prefix_len, caption_text)) if it's a caption.
123fn try_parse_caption_prefix(line: &str) -> Option<(usize, &str)> {
124    let trimmed = line.trim_start();
125    let leading_spaces = line.len() - trimmed.len();
126
127    // Must have leading spaces <= 3 to not be a code block
128    if leading_spaces > 3 {
129        return None;
130    }
131
132    // Check for "Table:" or "table:" or just ":".
133    if let Some(rest) = trimmed.strip_prefix("Table:") {
134        Some((leading_spaces + 6, rest))
135    } else if let Some(rest) = trimmed.strip_prefix("table:") {
136        Some((leading_spaces + 6, rest))
137    } else if let Some(rest) = trimmed.strip_prefix(':') {
138        // Just ":" caption markers must be followed by whitespace (Pandoc-style).
139        // This avoids accidentally treating constructs like fenced div fences ":::" as captions.
140        if rest.starts_with(|c: char| c.is_whitespace()) {
141            Some((leading_spaces + 1, rest))
142        } else {
143            None
144        }
145    } else {
146        None
147    }
148}
149
150/// Check if a line could be the start of a table caption.
151fn is_table_caption_start(line: &str) -> bool {
152    try_parse_caption_prefix(line).is_some()
153}
154
155fn is_bare_colon_caption_start(line: &str) -> bool {
156    let trimmed = line.trim_start();
157    trimmed.starts_with(':') && !trimmed.starts_with("::") && !trimmed.starts_with(":::")
158}
159
160fn bare_colon_caption_looks_like_definition_code_block(line: &str) -> bool {
161    let Some((_, rest)) = try_parse_caption_prefix(line) else {
162        return false;
163    };
164    let trimmed = rest.trim_start();
165    trimmed.starts_with("```") || trimmed.starts_with("~~~")
166}
167
168fn line_is_fenced_div_fence(line: &str) -> bool {
169    let trimmed = line.trim_start();
170    let colon_count = trimmed.chars().take_while(|&c| c == ':').count();
171    if colon_count < 3 {
172        return false;
173    }
174    let rest = &trimmed[colon_count..];
175    rest.is_empty() || rest.starts_with(char::is_whitespace)
176}
177
178fn is_valid_caption_start_before_table(lines: &[&str], pos: usize) -> bool {
179    if !is_table_caption_start(lines[pos]) {
180        return false;
181    }
182
183    if is_bare_colon_caption_start(lines[pos])
184        && bare_colon_caption_looks_like_definition_code_block(lines[pos])
185    {
186        return false;
187    }
188
189    // Avoid stealing definition-list definitions (":   ...") as table captions.
190    if is_bare_colon_caption_start(lines[pos])
191        && pos > 0
192        && !lines[pos - 1].trim().is_empty()
193        && !line_is_fenced_div_fence(lines[pos - 1])
194    {
195        return false;
196    }
197    true
198}
199
200/// Check if a line could be the start of a grid table.
201/// Grid tables start with a separator line like +---+---+ or +===+===+
202fn is_grid_table_start(line: &str) -> bool {
203    try_parse_grid_separator(line).is_some()
204}
205
206/// Check if a line could be the start of a multiline table.
207/// Multiline tables start with either:
208/// - A full-width dash separator (----)
209/// - A column separator with dashes and spaces (---- ---- ----)
210fn is_multiline_table_start(line: &str) -> bool {
211    try_parse_multiline_separator(line).is_some() || is_column_separator(line)
212}
213
214/// Check if there's a table following a potential caption at this position.
215/// This is used to avoid parsing a caption as a paragraph when it belongs to a table.
216pub(crate) fn is_caption_followed_by_table(lines: &[&str], caption_pos: usize) -> bool {
217    if caption_pos >= lines.len() {
218        return false;
219    }
220
221    // Caption must start with a caption prefix
222    if !is_valid_caption_start_before_table(lines, caption_pos) {
223        return false;
224    }
225
226    let mut pos = caption_pos + 1;
227
228    // Skip continuation lines of caption (non-blank lines)
229    while pos < lines.len() && !lines[pos].trim().is_empty() {
230        // If we hit a table separator, we found a table
231        if try_parse_table_separator(lines[pos]).is_some() {
232            return true;
233        }
234        pos += 1;
235    }
236
237    // Skip one blank line
238    if pos < lines.len() && lines[pos].trim().is_empty() {
239        pos += 1;
240    }
241
242    // Check for table at next position
243    if pos < lines.len() {
244        let line = lines[pos];
245
246        // Check for grid table start (+---+---+ or +===+===+)
247        if is_grid_table_start(line) {
248            return true;
249        }
250
251        // Check for multiline table start (---- or ---- ---- ----)
252        if is_multiline_table_start(line) {
253            return true;
254        }
255
256        // Could be a separator line (simple/pipe table, headerless)
257        if try_parse_table_separator(line).is_some() {
258            return true;
259        }
260
261        // Or could be a header line followed by separator (simple/pipe table with header)
262        if pos + 1 < lines.len() && !line.trim().is_empty() {
263            let next_line = lines[pos + 1];
264            if try_parse_table_separator(next_line).is_some()
265                || try_parse_pipe_separator(next_line).is_some()
266            {
267                return true;
268            }
269        }
270    }
271
272    false
273}
274
275fn caption_range_starting_at(lines: &[&str], start: usize) -> Option<(usize, usize)> {
276    if start >= lines.len() || !is_table_caption_start(lines[start]) {
277        return None;
278    }
279    let mut end = start + 1;
280    while end < lines.len() && !lines[end].trim().is_empty() {
281        end += 1;
282    }
283    Some((start, end))
284}
285
286/// Find caption before table (if any).
287/// Returns (caption_start, caption_end) positions, or None.
288fn find_caption_before_table(lines: &[&str], table_start: usize) -> Option<(usize, usize)> {
289    if table_start == 0 {
290        return None;
291    }
292
293    // Look backward for a caption
294    // Caption must be immediately before table (with possible blank line between)
295    let mut pos = table_start - 1;
296
297    // Skip one blank line if present
298    if lines[pos].trim().is_empty() {
299        if pos == 0 {
300            return None;
301        }
302        pos -= 1;
303    }
304
305    // Now pos points to the last non-blank line before the table
306    // This could be the last line of a multiline caption, or a single-line caption
307    let caption_end = pos + 1; // End is exclusive
308
309    // If this line is NOT a caption start, it might be a continuation line
310    // Scan backward through non-blank lines to find the caption start
311    if !is_valid_caption_start_before_table(lines, pos) {
312        // Not a caption start - check if there's a caption start above
313        let mut scan_pos = pos;
314        while scan_pos > 0 {
315            scan_pos -= 1;
316            let line = lines[scan_pos];
317
318            // If we hit a blank line, we've gone too far
319            if line.trim().is_empty() {
320                return None;
321            }
322
323            // If we find a caption start, this is the beginning of the multiline caption
324            if is_valid_caption_start_before_table(lines, scan_pos) {
325                if scan_pos > 0 && !lines[scan_pos - 1].trim().is_empty() {
326                    return None;
327                }
328                if previous_nonblank_looks_like_table(lines, scan_pos) {
329                    return None;
330                }
331                return Some((scan_pos, caption_end));
332            }
333        }
334        // Scanned to beginning without finding caption start
335        None
336    } else {
337        if pos > 0 && !lines[pos - 1].trim().is_empty() {
338            return None;
339        }
340        if previous_nonblank_looks_like_table(lines, pos) {
341            return None;
342        }
343        // This line is a caption start - return the range
344        Some((pos, caption_end))
345    }
346}
347
348fn previous_nonblank_looks_like_table(lines: &[&str], pos: usize) -> bool {
349    if pos == 0 {
350        return false;
351    }
352    let mut i = pos;
353    while i > 0 {
354        i -= 1;
355        let line = lines[i].trim();
356        if line.is_empty() {
357            continue;
358        }
359        return line_looks_like_table_syntax(line);
360    }
361    false
362}
363
364fn line_looks_like_table_syntax(line: &str) -> bool {
365    if line.starts_with('|') && line.matches('|').count() >= 2 {
366        return true;
367    }
368    if line.starts_with('+') && line.ends_with('+') && (line.contains('-') || line.contains('=')) {
369        return true;
370    }
371    try_parse_table_separator(line).is_some()
372        || try_parse_pipe_separator(line).is_some()
373        || try_parse_grid_separator(line).is_some()
374}
375
376/// Find caption after table (if any).
377/// Returns (caption_start, caption_end) positions, or None.
378fn find_caption_after_table(lines: &[&str], table_end: usize) -> Option<(usize, usize)> {
379    if table_end >= lines.len() {
380        return None;
381    }
382
383    let mut pos = table_end;
384
385    // Skip one blank line if present
386    if pos < lines.len() && lines[pos].trim().is_empty() {
387        pos += 1;
388    }
389
390    if pos >= lines.len() {
391        return None;
392    }
393
394    // Check if this line is a caption
395    if is_table_caption_start(lines[pos]) {
396        let caption_start = pos;
397        // Find end of caption (continues until blank line)
398        let mut caption_end = caption_start + 1;
399        while caption_end < lines.len() && !lines[caption_end].trim().is_empty() {
400            caption_end += 1;
401        }
402        Some((caption_start, caption_end))
403    } else {
404        None
405    }
406}
407
408/// Emit a table caption node.
409fn emit_table_caption(
410    builder: &mut GreenNodeBuilder<'static>,
411    lines: &[&str],
412    start: usize,
413    end: usize,
414    config: &ParserOptions,
415) {
416    builder.start_node(SyntaxKind::TABLE_CAPTION.into());
417
418    for (i, line) in lines[start..end].iter().enumerate() {
419        if i == 0 {
420            // First line - parse and emit prefix separately
421            let trimmed = line.trim_start();
422            let leading_ws_len = line.len() - trimmed.len();
423
424            // Emit leading whitespace if present
425            if leading_ws_len > 0 {
426                builder.token(SyntaxKind::WHITESPACE.into(), &line[..leading_ws_len]);
427            }
428
429            // Check for caption prefix and emit separately
430            // Calculate where the prefix ends (after trimmed content)
431            let prefix_and_rest = if line.ends_with('\n') {
432                &line[leading_ws_len..line.len() - 1] // Exclude newline
433            } else {
434                &line[leading_ws_len..]
435            };
436
437            let (prefix_len, prefix_text) = if prefix_and_rest.starts_with("Table: ") {
438                (7, "Table: ")
439            } else if prefix_and_rest.starts_with("table: ") {
440                (7, "table: ")
441            } else if prefix_and_rest.starts_with(": ") {
442                (2, ": ")
443            } else if prefix_and_rest.starts_with(':') {
444                (1, ":")
445            } else {
446                (0, "")
447            };
448
449            if prefix_len > 0 {
450                builder.token(SyntaxKind::TABLE_CAPTION_PREFIX.into(), prefix_text);
451
452                // Emit rest of line after prefix
453                let rest_start = leading_ws_len + prefix_len;
454                if rest_start < line.len() {
455                    // Get the caption text (excluding newline)
456                    let (caption_text, newline_str) = strip_newline(&line[rest_start..]);
457
458                    if !caption_text.is_empty() {
459                        inline_emission::emit_inlines(builder, caption_text, config);
460                    }
461
462                    if !newline_str.is_empty() {
463                        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
464                    }
465                }
466            } else {
467                // No recognized prefix, emit whole trimmed line
468                let (text, newline_str) = strip_newline(&line[leading_ws_len..]);
469
470                if !text.is_empty() {
471                    inline_emission::emit_inlines(builder, text, config);
472                }
473
474                if !newline_str.is_empty() {
475                    builder.token(SyntaxKind::NEWLINE.into(), newline_str);
476                }
477            }
478        } else {
479            // Continuation lines - emit with inline parsing
480            let (text, newline_str) = strip_newline(line);
481
482            if !text.is_empty() {
483                inline_emission::emit_inlines(builder, text, config);
484            }
485
486            if !newline_str.is_empty() {
487                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
488            }
489        }
490    }
491
492    builder.finish_node(); // TABLE_CAPTION
493}
494
495/// Emit a table cell with inline content parsing.
496/// This is the core helper for Phase 7.1 table inline parsing migration.
497fn emit_table_cell(
498    builder: &mut GreenNodeBuilder<'static>,
499    cell_text: &str,
500    config: &ParserOptions,
501) {
502    builder.start_node(SyntaxKind::TABLE_CELL.into());
503
504    // Parse inline content within the cell
505    if !cell_text.is_empty() {
506        inline_emission::emit_inlines(builder, cell_text, config);
507    }
508
509    builder.finish_node(); // TABLE_CELL
510}
511
512/// Determine column alignments based on separator and optional header.
513fn determine_alignments(columns: &mut [Column], separator_line: &str, header_line: Option<&str>) {
514    for col in columns.iter_mut() {
515        let sep_slice = &separator_line[col.start..col.end];
516
517        if let Some(header) = header_line {
518            let header_start = column_offset_to_byte_index(header, col.start);
519            let header_end = column_offset_to_byte_index(header, col.end);
520
521            // Extract header text for this column
522            let header_text = if header_start < header_end {
523                header[header_start..header_end].trim()
524            } else if header_start < header.len() {
525                header[header_start..].trim()
526            } else {
527                ""
528            };
529
530            if header_text.is_empty() {
531                col.alignment = Alignment::Default;
532                continue;
533            }
534
535            // Find where the header text starts and ends within the column
536            let header_in_col = &header[header_start..header_end];
537            let text_start = header_in_col.len() - header_in_col.trim_start().len();
538            let text_end = header_in_col.trim_end().len() + text_start;
539
540            // Check dash alignment relative to text
541            let dashes_start = 0; // Dashes start at beginning of sep_slice
542            let dashes_end = sep_slice.len();
543
544            let flush_left = dashes_start == text_start;
545            let flush_right = dashes_end == text_end;
546
547            col.alignment = match (flush_left, flush_right) {
548                (true, true) => Alignment::Default,
549                (true, false) => Alignment::Left,
550                (false, true) => Alignment::Right,
551                (false, false) => Alignment::Center,
552            };
553        } else {
554            // Without header, alignment based on first row (we'll handle this later)
555            col.alignment = Alignment::Default;
556        }
557    }
558}
559
560/// Try to parse a simple table starting at the given position.
561/// Returns the number of lines consumed if successful.
562pub(crate) fn try_parse_simple_table(
563    lines: &[&str],
564    start_pos: usize,
565    builder: &mut GreenNodeBuilder<'static>,
566    config: &ParserOptions,
567) -> Option<usize> {
568    log::debug!("try_parse_simple_table at line {}", start_pos + 1);
569
570    if start_pos >= lines.len() {
571        return None;
572    }
573
574    // Look for a separator line
575    let separator_pos = find_separator_line(lines, start_pos)?;
576    log::debug!("  found separator at line {}", separator_pos + 1);
577
578    let separator_line = lines[separator_pos];
579    let mut columns = try_parse_table_separator(separator_line)?;
580
581    // Determine if there's a header (separator not at start)
582    let has_header = separator_pos > start_pos;
583    let header_line = if has_header {
584        Some(lines[separator_pos - 1])
585    } else {
586        None
587    };
588
589    // Determine alignments
590    determine_alignments(&mut columns, separator_line, header_line);
591
592    // Find table end (blank line or end of input)
593    let end_pos = find_table_end(lines, separator_pos + 1);
594
595    // Must have at least one data row (or it's just a separator)
596    let data_rows = end_pos - separator_pos - 1;
597
598    if data_rows == 0 {
599        return None;
600    }
601
602    // Check for caption before table
603    let caption_before = find_caption_before_table(lines, start_pos);
604
605    // Check for caption after table
606    let caption_after = if caption_before.is_some() {
607        None
608    } else {
609        find_caption_after_table(lines, end_pos)
610    };
611
612    // Build the table
613    builder.start_node(SyntaxKind::SIMPLE_TABLE.into());
614
615    // Emit caption before if present
616    if let Some((cap_start, cap_end)) = caption_before {
617        emit_table_caption(builder, lines, cap_start, cap_end, config);
618
619        // Emit blank line between caption and table if present
620        if cap_end < start_pos {
621            for line in lines.iter().take(start_pos).skip(cap_end) {
622                if line.trim().is_empty() {
623                    builder.start_node(SyntaxKind::BLANK_LINE.into());
624                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
625                    builder.finish_node();
626                }
627            }
628        }
629    }
630
631    // Emit header if present
632    if has_header {
633        emit_table_row(
634            builder,
635            lines[separator_pos - 1],
636            &columns,
637            SyntaxKind::TABLE_HEADER,
638            config,
639        );
640    }
641
642    // Emit separator
643    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
644    emit_line_tokens(builder, separator_line);
645    builder.finish_node();
646
647    // Emit data rows
648    for line in lines.iter().take(end_pos).skip(separator_pos + 1) {
649        emit_table_row(builder, line, &columns, SyntaxKind::TABLE_ROW, config);
650    }
651
652    // Emit caption after if present
653    if let Some((cap_start, cap_end)) = caption_after {
654        // Emit blank line before caption if needed
655        if cap_start > end_pos {
656            for line in lines.iter().take(cap_start).skip(end_pos) {
657                if line.trim().is_empty() {
658                    builder.start_node(SyntaxKind::BLANK_LINE.into());
659                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
660                    builder.finish_node();
661                }
662            }
663        }
664        emit_table_caption(builder, lines, cap_start, cap_end, config);
665    }
666
667    builder.finish_node(); // SimpleTable
668
669    // Calculate lines consumed (including captions)
670    let table_start = if let Some((cap_start, _)) = caption_before {
671        cap_start
672    } else if has_header {
673        separator_pos - 1
674    } else {
675        separator_pos
676    };
677
678    let table_end = if let Some((_, cap_end)) = caption_after {
679        cap_end
680    } else {
681        end_pos
682    };
683
684    let lines_consumed = table_end - table_start;
685
686    Some(lines_consumed)
687}
688
689/// Find the position of a separator line starting from pos.
690fn find_separator_line(lines: &[&str], start_pos: usize) -> Option<usize> {
691    log::debug!("  find_separator_line from line {}", start_pos + 1);
692
693    // Check first line
694    log::debug!("    checking first line: {:?}", lines[start_pos]);
695    if try_parse_table_separator(lines[start_pos]).is_some() {
696        log::debug!("    separator found at first line");
697        return Some(start_pos);
698    }
699
700    // Check second line (for table with header)
701    if start_pos + 1 < lines.len()
702        && !lines[start_pos].trim().is_empty()
703        && try_parse_table_separator(lines[start_pos + 1]).is_some()
704    {
705        return Some(start_pos + 1);
706    }
707    None
708}
709
710/// Find where the table ends (first blank line or end of input).
711fn find_table_end(lines: &[&str], start_pos: usize) -> usize {
712    for i in start_pos..lines.len() {
713        if lines[i].trim().is_empty() {
714            return i;
715        }
716        // Check if this could be a closing separator
717        if try_parse_table_separator(lines[i]).is_some() {
718            // Check if next line is blank or end
719            if i + 1 >= lines.len() || lines[i + 1].trim().is_empty() {
720                return i + 1;
721            }
722        }
723    }
724    lines.len()
725}
726
727/// Emit a table row (header or data row) with inline-parsed cells for simple tables.
728/// Uses column boundaries from the separator line to extract cells.
729fn emit_table_row(
730    builder: &mut GreenNodeBuilder<'static>,
731    line: &str,
732    columns: &[Column],
733    row_kind: SyntaxKind,
734    config: &ParserOptions,
735) {
736    builder.start_node(row_kind.into());
737
738    let (line_without_newline, newline_str) = strip_newline(line);
739
740    // Emit leading whitespace if present
741    let trimmed = line_without_newline.trim_start();
742    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
743    if leading_ws_len > 0 {
744        builder.token(
745            SyntaxKind::WHITESPACE.into(),
746            &line_without_newline[..leading_ws_len],
747        );
748    }
749
750    // Track where we are in the line (for losslessness)
751    let mut current_pos = 0;
752
753    // Extract and emit cells based on column boundaries
754    for col in columns.iter() {
755        // Calculate actual positions in the trimmed line (accounting for leading whitespace)
756        let cell_start = if col.start >= leading_ws_len {
757            column_offset_to_byte_index(trimmed, col.start - leading_ws_len)
758        } else {
759            0
760        };
761
762        let cell_end = if col.end >= leading_ws_len {
763            column_offset_to_byte_index(trimmed, col.end - leading_ws_len)
764        } else {
765            0
766        };
767
768        // Extract cell text from column bounds
769        let cell_text = if cell_start < cell_end && cell_start < trimmed.len() {
770            &trimmed[cell_start..cell_end]
771        } else if cell_start < trimmed.len() {
772            &trimmed[cell_start..]
773        } else {
774            ""
775        };
776
777        let cell_content = cell_text.trim();
778        let cell_content_start = cell_text.len() - cell_text.trim_start().len();
779
780        // Emit any whitespace from current position to start of cell content
781        let content_abs_pos = (cell_start + cell_content_start).min(trimmed.len());
782        if current_pos < content_abs_pos {
783            builder.token(
784                SyntaxKind::WHITESPACE.into(),
785                &trimmed[current_pos..content_abs_pos],
786            );
787        }
788
789        // Emit cell with inline parsing
790        emit_table_cell(builder, cell_content, config);
791
792        // Update current position to end of cell content
793        current_pos = content_abs_pos + cell_content.len();
794    }
795
796    // Emit any remaining whitespace after last cell
797    if current_pos < trimmed.len() {
798        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
799    }
800
801    // Emit newline if present
802    if !newline_str.is_empty() {
803        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
804    }
805
806    builder.finish_node();
807}
808
809// ============================================================================
810// Pipe Table Parsing
811// ============================================================================
812
813/// Check if a line is a pipe table separator line.
814/// Returns the column alignments if it's a valid separator.
815fn try_parse_pipe_separator(line: &str) -> Option<Vec<Alignment>> {
816    let trimmed = line.trim();
817
818    // Must contain at least one pipe
819    if !trimmed.contains('|') && !trimmed.contains('+') {
820        return None;
821    }
822
823    // Split by pipes (or + for orgtbl variant)
824    let cells: Vec<&str> = if trimmed.contains('+') {
825        // Orgtbl variant: use + as separator in separator line
826        trimmed.split(['|', '+']).collect()
827    } else {
828        trimmed.split('|').collect()
829    };
830
831    let mut alignments = Vec::new();
832
833    for cell in cells {
834        let cell = cell.trim();
835
836        // Skip empty cells (from leading/trailing pipes)
837        if cell.is_empty() {
838            continue;
839        }
840
841        // Must be dashes with optional colons
842        let starts_colon = cell.starts_with(':');
843        let ends_colon = cell.ends_with(':');
844
845        // Remove colons to check if rest is all dashes
846        let without_colons = cell.trim_start_matches(':').trim_end_matches(':');
847
848        // Must have at least one dash
849        if without_colons.is_empty() || !without_colons.chars().all(|c| c == '-') {
850            return None;
851        }
852
853        // Determine alignment from colon positions
854        let alignment = match (starts_colon, ends_colon) {
855            (true, true) => Alignment::Center,
856            (true, false) => Alignment::Left,
857            (false, true) => Alignment::Right,
858            (false, false) => Alignment::Default,
859        };
860
861        alignments.push(alignment);
862    }
863
864    // Must have at least one column
865    if alignments.is_empty() {
866        None
867    } else {
868        Some(alignments)
869    }
870}
871
872/// Split a pipe table row into cells.
873/// Handles escaped pipes (\|) properly by not splitting on them.
874fn parse_pipe_table_row(line: &str) -> Vec<String> {
875    let trimmed = line.trim();
876
877    let mut cells = Vec::new();
878    let mut current_cell = String::new();
879    let mut chars = trimmed.chars().peekable();
880    let mut char_count = 0;
881
882    while let Some(ch) = chars.next() {
883        char_count += 1;
884        match ch {
885            '\\' => {
886                // Check if next char is a pipe - if so, it's an escaped pipe
887                if let Some(&'|') = chars.peek() {
888                    current_cell.push('\\');
889                    current_cell.push('|');
890                    chars.next(); // consume the pipe
891                } else {
892                    current_cell.push(ch);
893                }
894            }
895            '|' => {
896                // Check if this is the leading pipe (first character)
897                if char_count == 1 {
898                    continue; // Skip leading pipe
899                }
900
901                // End current cell, start new one
902                cells.push(current_cell.trim().to_string());
903                current_cell.clear();
904            }
905            _ => {
906                current_cell.push(ch);
907            }
908        }
909    }
910
911    // Add last cell if it's not empty (it would be empty if line ended with pipe)
912    let trimmed_cell = current_cell.trim().to_string();
913    if !trimmed_cell.is_empty() {
914        cells.push(trimmed_cell);
915    }
916
917    cells
918}
919
920/// Emit a pipe table row with inline-parsed cells.
921/// Preserves losslessness by emitting exact byte representation while parsing cell content inline.
922fn emit_pipe_table_row(
923    builder: &mut GreenNodeBuilder<'static>,
924    line: &str,
925    row_kind: SyntaxKind,
926    config: &ParserOptions,
927) {
928    builder.start_node(row_kind.into());
929
930    let (line_without_newline, newline_str) = strip_newline(line);
931    let trimmed = line_without_newline.trim();
932
933    // Parse cell boundaries
934    let mut cell_starts = Vec::new();
935    let mut cell_ends = Vec::new();
936    let mut in_escape = false;
937
938    // Find all pipe positions (excluding escaped ones)
939    let mut pipe_positions = Vec::new();
940    for (i, ch) in trimmed.char_indices() {
941        if in_escape {
942            in_escape = false;
943            continue;
944        }
945        if ch == '\\' {
946            in_escape = true;
947            continue;
948        }
949        if ch == '|' {
950            pipe_positions.push(i);
951        }
952    }
953
954    // Determine cell boundaries based on pipe positions
955    if pipe_positions.is_empty() {
956        // No pipes - treat entire line as one cell (shouldn't happen for valid pipe tables)
957        cell_starts.push(0);
958        cell_ends.push(trimmed.len());
959    } else {
960        // Check if line starts with pipe
961        let start_pipe = pipe_positions.first() == Some(&0);
962        // Check if line ends with pipe
963        let end_pipe = pipe_positions.last() == Some(&(trimmed.len() - 1));
964
965        if start_pipe {
966            // Skip first pipe
967            for i in 1..pipe_positions.len() {
968                cell_starts.push(pipe_positions[i - 1] + 1);
969                cell_ends.push(pipe_positions[i]);
970            }
971            // Add last cell if there's no trailing pipe
972            if !end_pipe {
973                cell_starts.push(*pipe_positions.last().unwrap() + 1);
974                cell_ends.push(trimmed.len());
975            }
976        } else {
977            // No leading pipe
978            cell_starts.push(0);
979            cell_ends.push(pipe_positions[0]);
980
981            for i in 1..pipe_positions.len() {
982                cell_starts.push(pipe_positions[i - 1] + 1);
983                cell_ends.push(pipe_positions[i]);
984            }
985
986            // Add last cell if there's no trailing pipe
987            if !end_pipe {
988                cell_starts.push(*pipe_positions.last().unwrap() + 1);
989                cell_ends.push(trimmed.len());
990            }
991        }
992    }
993
994    // Emit leading whitespace if present (before trim)
995    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
996    if leading_ws_len > 0 {
997        builder.token(
998            SyntaxKind::WHITESPACE.into(),
999            &line_without_newline[..leading_ws_len],
1000        );
1001    }
1002
1003    // Emit cells with pipes
1004    for (idx, (start, end)) in cell_starts.iter().zip(cell_ends.iter()).enumerate() {
1005        // Emit pipe before cell (except for first cell if no leading pipe)
1006        if *start > 0 {
1007            builder.token(SyntaxKind::TEXT.into(), "|");
1008        } else if idx == 0 && trimmed.starts_with('|') {
1009            // Leading pipe
1010            builder.token(SyntaxKind::TEXT.into(), "|");
1011        }
1012
1013        // Get cell content with its whitespace
1014        let cell_with_ws = &trimmed[*start..*end];
1015        let cell_content = cell_with_ws.trim();
1016
1017        // Emit leading whitespace within cell
1018        let cell_leading_ws = &cell_with_ws[..cell_with_ws.len() - cell_with_ws.trim_start().len()];
1019        if !cell_leading_ws.is_empty() {
1020            builder.token(SyntaxKind::WHITESPACE.into(), cell_leading_ws);
1021        }
1022
1023        // Emit cell with inline parsing
1024        emit_table_cell(builder, cell_content, config);
1025
1026        // Emit trailing whitespace within cell
1027        let cell_trailing_ws_start = cell_leading_ws.len() + cell_content.len();
1028        if cell_trailing_ws_start < cell_with_ws.len() {
1029            builder.token(
1030                SyntaxKind::WHITESPACE.into(),
1031                &cell_with_ws[cell_trailing_ws_start..],
1032            );
1033        }
1034    }
1035
1036    // Emit trailing pipe if present
1037    if !pipe_positions.is_empty() && trimmed.ends_with('|') {
1038        builder.token(SyntaxKind::TEXT.into(), "|");
1039    }
1040
1041    // Emit trailing whitespace after trim (before newline)
1042    let trailing_ws_start = leading_ws_len + trimmed.len();
1043    if trailing_ws_start < line_without_newline.len() {
1044        builder.token(
1045            SyntaxKind::WHITESPACE.into(),
1046            &line_without_newline[trailing_ws_start..],
1047        );
1048    }
1049
1050    // Emit newline
1051    if !newline_str.is_empty() {
1052        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1053    }
1054
1055    builder.finish_node();
1056}
1057
1058/// Try to parse a pipe table starting at the given position.
1059/// Returns the number of lines consumed if successful.
1060pub(crate) fn try_parse_pipe_table(
1061    lines: &[&str],
1062    start_pos: usize,
1063    builder: &mut GreenNodeBuilder<'static>,
1064    config: &ParserOptions,
1065) -> Option<usize> {
1066    if start_pos + 1 >= lines.len() {
1067        return None;
1068    }
1069
1070    // Check if this line is a caption followed by a table
1071    // If so, the actual table starts after the caption and blank line
1072    let (actual_start, caption_before) = if is_caption_followed_by_table(lines, start_pos) {
1073        let (cap_start, cap_end) = caption_range_starting_at(lines, start_pos)?;
1074        let mut pos = cap_end;
1075        while pos < lines.len() && lines[pos].trim().is_empty() {
1076            pos += 1;
1077        }
1078        (pos, Some((cap_start, cap_end)))
1079    } else {
1080        (start_pos, None)
1081    };
1082
1083    if actual_start + 1 >= lines.len() {
1084        return None;
1085    }
1086
1087    // First line should have pipes (potential header)
1088    let header_line = lines[actual_start];
1089    if !header_line.contains('|') {
1090        return None;
1091    }
1092
1093    // Second line should be separator
1094    let separator_line = lines[actual_start + 1];
1095    let alignments = try_parse_pipe_separator(separator_line)?;
1096
1097    // Parse header cells
1098    let header_cells = parse_pipe_table_row(header_line);
1099
1100    // Number of columns should match (approximately - be lenient)
1101    if header_cells.len() != alignments.len() && !header_cells.is_empty() {
1102        // Only fail if very different
1103        if header_cells.len() < alignments.len() / 2 || header_cells.len() > alignments.len() * 2 {
1104            return None;
1105        }
1106    }
1107
1108    // Find table end (first blank line or end of input)
1109    let mut end_pos = actual_start + 2;
1110    while end_pos < lines.len() {
1111        let line = lines[end_pos];
1112        if line.trim().is_empty() {
1113            break;
1114        }
1115        // Row should have pipes
1116        if !line.contains('|') {
1117            break;
1118        }
1119        end_pos += 1;
1120    }
1121
1122    // Must have at least one data row
1123    if end_pos <= actual_start + 2 {
1124        return None;
1125    }
1126
1127    // Check for caption before table (only if we didn't already detect it)
1128    let caption_before = caption_before.or_else(|| find_caption_before_table(lines, actual_start));
1129
1130    // Check for caption after table
1131    let caption_after = if caption_before.is_some() {
1132        None
1133    } else {
1134        find_caption_after_table(lines, end_pos)
1135    };
1136
1137    // Build the pipe table
1138    builder.start_node(SyntaxKind::PIPE_TABLE.into());
1139
1140    // Emit caption before if present
1141    if let Some((cap_start, cap_end)) = caption_before {
1142        emit_table_caption(builder, lines, cap_start, cap_end, config);
1143        // Emit blank line between caption and table if present
1144        if cap_end < actual_start {
1145            for line in lines.iter().take(actual_start).skip(cap_end) {
1146                if line.trim().is_empty() {
1147                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1148                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1149                    builder.finish_node();
1150                }
1151            }
1152        }
1153    }
1154
1155    // Emit header row with inline-parsed cells
1156    emit_pipe_table_row(builder, header_line, SyntaxKind::TABLE_HEADER, config);
1157
1158    // Emit separator
1159    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1160    emit_line_tokens(builder, separator_line);
1161    builder.finish_node();
1162
1163    // Emit data rows with inline-parsed cells
1164    for line in lines.iter().take(end_pos).skip(actual_start + 2) {
1165        emit_pipe_table_row(builder, line, SyntaxKind::TABLE_ROW, config);
1166    }
1167
1168    // Emit caption after if present
1169    if let Some((cap_start, cap_end)) = caption_after {
1170        // Emit blank line before caption if needed
1171        if cap_start > end_pos {
1172            for line in lines.iter().take(cap_start).skip(end_pos) {
1173                if line.trim().is_empty() {
1174                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1175                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1176                    builder.finish_node();
1177                }
1178            }
1179        }
1180        emit_table_caption(builder, lines, cap_start, cap_end, config);
1181    }
1182
1183    builder.finish_node(); // PipeTable
1184
1185    // Calculate lines consumed
1186    let table_start = caption_before
1187        .map(|(start, _)| start)
1188        .unwrap_or(actual_start);
1189    let table_end = if let Some((_, cap_end)) = caption_after {
1190        cap_end
1191    } else {
1192        end_pos
1193    };
1194
1195    Some(table_end - table_start)
1196}
1197
1198#[cfg(test)]
1199mod tests {
1200    use super::*;
1201
1202    #[test]
1203    fn test_separator_detection() {
1204        assert!(try_parse_table_separator("------- ------ ----------   -------").is_some());
1205        assert!(try_parse_table_separator("  ---  ---  ---").is_some());
1206        assert!(try_parse_table_separator("-------").is_none()); // horizontal rule
1207        assert!(try_parse_table_separator("--- --- ---").is_some()); // table separator
1208    }
1209
1210    #[test]
1211    fn test_column_extraction() {
1212        let line = "-------     ------ ----------   -------";
1213        let columns = extract_columns(line, 0);
1214        assert_eq!(columns.len(), 4);
1215    }
1216
1217    #[test]
1218    fn test_simple_table_with_header() {
1219        let input = vec![
1220            "  Right     Left     Center     Default",
1221            "-------     ------ ----------   -------",
1222            "     12     12        12            12",
1223            "    123     123       123          123",
1224            "",
1225        ];
1226
1227        let mut builder = GreenNodeBuilder::new();
1228        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1229
1230        assert!(result.is_some());
1231        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1232    }
1233
1234    #[test]
1235    fn test_headerless_table() {
1236        let input = vec![
1237            "-------     ------ ----------   -------",
1238            "     12     12        12            12",
1239            "    123     123       123          123",
1240            "",
1241        ];
1242
1243        let mut builder = GreenNodeBuilder::new();
1244        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1245
1246        assert!(result.is_some());
1247        assert_eq!(result.unwrap(), 3); // sep + 2 rows
1248    }
1249
1250    #[test]
1251    fn test_caption_prefix_detection() {
1252        assert!(try_parse_caption_prefix("Table: My caption").is_some());
1253        assert!(try_parse_caption_prefix("table: My caption").is_some());
1254        assert!(try_parse_caption_prefix(": My caption").is_some());
1255        assert!(try_parse_caption_prefix(":").is_none()); // Just colon, no content
1256        assert!(try_parse_caption_prefix("Not a caption").is_none());
1257    }
1258
1259    #[test]
1260    fn bare_colon_fenced_code_is_not_table_caption() {
1261        let input = "Term\n: ```\n  code\n  ```\n";
1262        let tree = crate::parse(input, None);
1263
1264        assert!(
1265            tree.descendants()
1266                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1267            "should parse as definition list"
1268        );
1269        assert!(
1270            tree.descendants()
1271                .any(|node| node.kind() == SyntaxKind::CODE_BLOCK),
1272            "definition should preserve fenced code block"
1273        );
1274        assert!(
1275            !tree
1276                .descendants()
1277                .any(|node| node.kind() == SyntaxKind::TABLE_CAPTION),
1278            "fenced code definition should not be parsed as table caption"
1279        );
1280    }
1281
1282    #[test]
1283    fn bare_colon_caption_after_div_opening_is_table_caption() {
1284        let input = "::: {#tbl:panel layout.nrow=\"1\"}\n  : My Caption {#tbl:foo-1}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\n  : My Caption2 {#tbl:foo-2}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\nCaption\n:::\n";
1285        let tree = crate::parse(input, None);
1286
1287        let caption_count = tree
1288            .descendants()
1289            .filter(|node| node.kind() == SyntaxKind::TABLE_CAPTION)
1290            .count();
1291        assert_eq!(
1292            caption_count, 2,
1293            "expected both captions to attach to tables"
1294        );
1295        assert!(
1296            !tree
1297                .descendants()
1298                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1299            "caption lines in this fenced div table layout should not parse as definition list"
1300        );
1301    }
1302
1303    #[test]
1304    fn test_table_with_caption_after() {
1305        let input = vec![
1306            "  Right     Left     Center     Default",
1307            "-------     ------ ----------   -------",
1308            "     12     12        12            12",
1309            "    123     123       123          123",
1310            "",
1311            "Table: Demonstration of simple table syntax.",
1312            "",
1313        ];
1314
1315        let mut builder = GreenNodeBuilder::new();
1316        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1317
1318        assert!(result.is_some());
1319        // Should consume: header + sep + 2 rows + blank + caption
1320        assert_eq!(result.unwrap(), 6);
1321    }
1322
1323    #[test]
1324    fn test_table_with_caption_before() {
1325        let input = vec![
1326            "Table: Demonstration of simple table syntax.",
1327            "",
1328            "  Right     Left     Center     Default",
1329            "-------     ------ ----------   -------",
1330            "     12     12        12            12",
1331            "    123     123       123          123",
1332            "",
1333        ];
1334
1335        let mut builder = GreenNodeBuilder::new();
1336        let result = try_parse_simple_table(&input, 2, &mut builder, &ParserOptions::default());
1337
1338        assert!(result.is_some());
1339        // Should consume: caption + blank + header + sep + 2 rows
1340        assert_eq!(result.unwrap(), 6);
1341    }
1342
1343    #[test]
1344    fn test_caption_with_colon_prefix() {
1345        let input = vec![
1346            "  Right     Left",
1347            "-------     ------",
1348            "     12     12",
1349            "",
1350            ": Short caption",
1351            "",
1352        ];
1353
1354        let mut builder = GreenNodeBuilder::new();
1355        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1356
1357        assert!(result.is_some());
1358        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1359    }
1360
1361    #[test]
1362    fn test_multiline_caption() {
1363        let input = vec![
1364            "  Right     Left",
1365            "-------     ------",
1366            "     12     12",
1367            "",
1368            "Table: This is a longer caption",
1369            "that spans multiple lines.",
1370            "",
1371        ];
1372
1373        let mut builder = GreenNodeBuilder::new();
1374        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1375
1376        assert!(result.is_some());
1377        // Should consume through end of multi-line caption
1378        assert_eq!(result.unwrap(), 6);
1379    }
1380
1381    #[test]
1382    fn test_simple_table_with_multibyte_cell_content() {
1383        let input = vec![
1384            "Name            Hex code     Hue     C, M, Y, K (%)   R, G, B (0-255)   R, G, B (%)",
1385            "--------------  ------------ ------- ---------------- ----------------- ------------",
1386            "        orange       #E69F00     41° 0, 50, 100, 0    230, 159, 0       90, 60, 0",
1387            "      sky blue       #56B4E9    202° 80, 0, 0, 0      86, 180, 233      35, 70, 90",
1388            "",
1389        ];
1390
1391        let mut builder = GreenNodeBuilder::new();
1392        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1393
1394        assert!(result.is_some());
1395        assert_eq!(result.unwrap(), 4);
1396    }
1397
1398    // Pipe table tests
1399    #[test]
1400    fn test_pipe_separator_detection() {
1401        assert!(try_parse_pipe_separator("|------:|:-----|---------|:------:|").is_some());
1402        assert!(try_parse_pipe_separator("|---|---|").is_some());
1403        assert!(try_parse_pipe_separator("-----|-----:").is_some()); // No leading pipe
1404        assert!(try_parse_pipe_separator("|-----+-------|").is_some()); // Orgtbl variant
1405        assert!(try_parse_pipe_separator("not a separator").is_none());
1406    }
1407
1408    #[test]
1409    fn test_pipe_alignments() {
1410        let aligns = try_parse_pipe_separator("|------:|:-----|---------|:------:|").unwrap();
1411        assert_eq!(aligns.len(), 4);
1412        assert_eq!(aligns[0], Alignment::Right);
1413        assert_eq!(aligns[1], Alignment::Left);
1414        assert_eq!(aligns[2], Alignment::Default);
1415        assert_eq!(aligns[3], Alignment::Center);
1416    }
1417
1418    #[test]
1419    fn test_parse_pipe_table_row() {
1420        let cells = parse_pipe_table_row("| Right | Left | Center |");
1421        assert_eq!(cells.len(), 3);
1422        assert_eq!(cells[0], "Right");
1423        assert_eq!(cells[1], "Left");
1424        assert_eq!(cells[2], "Center");
1425
1426        // Without leading/trailing pipes
1427        let cells2 = parse_pipe_table_row("Right | Left | Center");
1428        assert_eq!(cells2.len(), 3);
1429    }
1430
1431    #[test]
1432    fn test_basic_pipe_table() {
1433        let input = vec![
1434            "",
1435            "| Right | Left | Center |",
1436            "|------:|:-----|:------:|",
1437            "|   12  |  12  |   12   |",
1438            "|  123  |  123 |  123   |",
1439            "",
1440        ];
1441
1442        let mut builder = GreenNodeBuilder::new();
1443        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1444
1445        assert!(result.is_some());
1446        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1447    }
1448
1449    #[test]
1450    fn test_pipe_table_no_edge_pipes() {
1451        let input = vec![
1452            "",
1453            "fruit| price",
1454            "-----|-----:",
1455            "apple|2.05",
1456            "pear|1.37",
1457            "",
1458        ];
1459
1460        let mut builder = GreenNodeBuilder::new();
1461        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1462
1463        assert!(result.is_some());
1464        assert_eq!(result.unwrap(), 4);
1465    }
1466
1467    #[test]
1468    fn test_pipe_table_with_caption() {
1469        let input = vec![
1470            "",
1471            "| Col1 | Col2 |",
1472            "|------|------|",
1473            "| A    | B    |",
1474            "",
1475            "Table: My pipe table",
1476            "",
1477        ];
1478
1479        let mut builder = GreenNodeBuilder::new();
1480        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1481
1482        assert!(result.is_some());
1483        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1484    }
1485
1486    #[test]
1487    fn test_pipe_table_with_multiline_caption_before() {
1488        let input = vec![
1489            ": (#tab:base) base R quoting",
1490            "functions",
1491            "",
1492            "| C | D |",
1493            "|---|---|",
1494            "| 3 | 4 |",
1495            "",
1496        ];
1497
1498        let mut builder = GreenNodeBuilder::new();
1499        let result = try_parse_pipe_table(&input, 0, &mut builder, &ParserOptions::default());
1500
1501        assert!(result.is_some());
1502        // caption(2) + blank(1) + header + sep + row
1503        assert_eq!(result.unwrap(), 6);
1504    }
1505}
1506
1507// ============================================================================
1508// Grid Table Parsing
1509// ============================================================================
1510
1511/// Check if a line is a grid table row separator (starts with +, contains -, ends with +).
1512/// Returns Some(vec of column info) if valid, None otherwise.
1513fn try_parse_grid_separator(line: &str) -> Option<Vec<GridColumn>> {
1514    let trimmed = line.trim_start();
1515    let leading_spaces = line.len() - trimmed.len();
1516
1517    // Must have leading spaces <= 3 to not be a code block
1518    if leading_spaces > 3 {
1519        return None;
1520    }
1521
1522    // Must start with + and end with +
1523    if !trimmed.starts_with('+') || !trimmed.trim_end().ends_with('+') {
1524        return None;
1525    }
1526
1527    // Split by + to get column segments
1528    let trimmed = trimmed.trim_end();
1529    let segments: Vec<&str> = trimmed.split('+').collect();
1530
1531    // Need at least 3 parts: empty before first +, column(s), empty after last +
1532    if segments.len() < 3 {
1533        return None;
1534    }
1535
1536    let mut columns = Vec::new();
1537
1538    // Parse each segment between + signs
1539    for segment in segments.iter().skip(1).take(segments.len() - 2) {
1540        if segment.is_empty() {
1541            continue;
1542        }
1543
1544        // Segment must be dashes/equals with optional colons for alignment
1545        let seg_trimmed = *segment;
1546
1547        // Get the fill character (after removing colons)
1548        let inner = seg_trimmed.trim_start_matches(':').trim_end_matches(':');
1549
1550        // Must be all dashes or all equals
1551        if inner.is_empty() {
1552            return None;
1553        }
1554
1555        let first_char = inner.chars().next().unwrap();
1556        if first_char != '-' && first_char != '=' {
1557            return None;
1558        }
1559
1560        if !inner.chars().all(|c| c == first_char) {
1561            return None;
1562        }
1563
1564        let is_header_sep = first_char == '=';
1565
1566        columns.push(GridColumn {
1567            is_header_separator: is_header_sep,
1568            width: seg_trimmed.chars().count(),
1569        });
1570    }
1571
1572    if columns.is_empty() {
1573        None
1574    } else {
1575        Some(columns)
1576    }
1577}
1578
1579/// Column information for grid tables.
1580#[derive(Debug, Clone)]
1581struct GridColumn {
1582    is_header_separator: bool,
1583    width: usize,
1584}
1585
1586fn slice_cell_by_display_width(line: &str, start_byte: usize, width: usize) -> (usize, usize) {
1587    let mut end_byte = start_byte;
1588    let mut display_cols = 0usize;
1589
1590    for (offset, ch) in line[start_byte..].char_indices() {
1591        if ch == '|' {
1592            let sep_byte = start_byte + offset;
1593            return (sep_byte, sep_byte + 1);
1594        }
1595        let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
1596        if display_cols + ch_width > width {
1597            break;
1598        }
1599        display_cols += ch_width;
1600        end_byte = start_byte + offset + ch.len_utf8();
1601        if display_cols >= width {
1602            break;
1603        }
1604    }
1605
1606    // If the width budget is exhausted before seeing a separator (for example
1607    // because of padding/layout drift), advance to the next literal separator
1608    // to keep row slicing aligned and preserve losslessness.
1609    let mut sep_byte = end_byte;
1610    while sep_byte < line.len() {
1611        let mut chars = line[sep_byte..].chars();
1612        let Some(ch) = chars.next() else {
1613            break;
1614        };
1615        if ch == '|' {
1616            return (sep_byte, sep_byte + 1);
1617        }
1618        sep_byte += ch.len_utf8();
1619    }
1620
1621    (end_byte, end_byte)
1622}
1623
1624/// Check if a line is a grid table content row.
1625/// Accepts normal rows ending with `|` and spanning-style continuation lines ending with `+`.
1626fn is_grid_content_row(line: &str) -> bool {
1627    let trimmed = line.trim_start();
1628    let leading_spaces = line.len() - trimmed.len();
1629
1630    if leading_spaces > 3 {
1631        return false;
1632    }
1633
1634    let trimmed = trimmed.trim_end();
1635    trimmed.starts_with('|') && (trimmed.ends_with('|') || trimmed.ends_with('+'))
1636}
1637
1638/// Extract cell contents from a single grid table row line.
1639/// Returns a vector of cell contents (trimmed) based on column boundaries.
1640/// Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1641fn extract_grid_cells_from_line(line: &str, _columns: &[GridColumn]) -> Vec<String> {
1642    let (line_content, _) = strip_newline(line);
1643    let line_trimmed = line_content.trim();
1644
1645    if !line_trimmed.starts_with('|') || !line_trimmed.ends_with('|') {
1646        return vec![String::new(); _columns.len()];
1647    }
1648
1649    let mut cells = Vec::with_capacity(_columns.len());
1650    let mut pos_byte = 1; // Skip leading pipe
1651
1652    for col in _columns {
1653        let col_idx = cells.len();
1654        if pos_byte >= line_trimmed.len() {
1655            cells.push(String::new());
1656            continue;
1657        }
1658
1659        let start_byte = pos_byte;
1660        let end_byte = if col_idx + 1 == _columns.len() {
1661            line_trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1662        } else {
1663            let (end, next_start) = slice_cell_by_display_width(line_trimmed, pos_byte, col.width);
1664            pos_byte = next_start;
1665            end
1666        };
1667        cells.push(line_trimmed[start_byte..end_byte].trim().to_string());
1668        if col_idx + 1 == _columns.len() {
1669            pos_byte = line_trimmed.len();
1670        }
1671    }
1672
1673    cells
1674}
1675
1676/// Extract cell contents from multiple grid table row lines (for multi-line cells).
1677/// Concatenates cell contents across lines with newlines, then trims.
1678fn extract_grid_cells_multiline(lines: &[&str], columns: &[GridColumn]) -> Vec<String> {
1679    if lines.is_empty() {
1680        return vec![String::new(); columns.len()];
1681    }
1682
1683    extract_grid_cells_from_line(lines[0], columns)
1684}
1685
1686/// Emit a grid table row with inline-parsed cells.
1687/// Handles multi-line rows by emitting first line with TABLE_CELL nodes,
1688/// then continuation lines as raw TEXT for losslessness.
1689fn emit_grid_table_row(
1690    builder: &mut GreenNodeBuilder<'static>,
1691    lines: &[&str],
1692    columns: &[GridColumn],
1693    row_kind: SyntaxKind,
1694    config: &ParserOptions,
1695) {
1696    if lines.is_empty() {
1697        return;
1698    }
1699
1700    // Extract cell contents from the first line.
1701    let cell_contents = extract_grid_cells_multiline(lines, columns);
1702
1703    builder.start_node(row_kind.into());
1704
1705    // Emit first line with TABLE_CELL nodes
1706    // Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1707    let first_line = lines[0];
1708    let (line_without_newline, newline_str) = strip_newline(first_line);
1709    let trimmed = line_without_newline.trim();
1710    let expected_pipe_count = columns.len().saturating_add(1);
1711    let actual_pipe_count = trimmed.chars().filter(|&c| c == '|').count();
1712
1713    // Rows that don't contain all expected column separators (spanning-style rows)
1714    // must be emitted verbatim for losslessness.
1715    if actual_pipe_count != expected_pipe_count {
1716        emit_line_tokens(builder, first_line);
1717        for line in lines.iter().skip(1) {
1718            emit_line_tokens(builder, line);
1719        }
1720        builder.finish_node();
1721        return;
1722    }
1723
1724    // Emit leading whitespace
1725    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1726    if leading_ws_len > 0 {
1727        builder.token(
1728            SyntaxKind::WHITESPACE.into(),
1729            &line_without_newline[..leading_ws_len],
1730        );
1731    }
1732
1733    // Emit leading pipe
1734    if trimmed.starts_with('|') {
1735        builder.token(SyntaxKind::TEXT.into(), "|");
1736    }
1737
1738    // Emit each cell based on fixed column widths from separators
1739    let mut pos_byte = 1usize; // after leading pipe
1740    for (idx, cell_content) in cell_contents.iter().enumerate() {
1741        let part = if idx < columns.len() && pos_byte <= trimmed.len() {
1742            let start_byte = pos_byte;
1743            let end_byte = if idx + 1 == columns.len() && !trimmed.is_empty() {
1744                trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1745            } else {
1746                let (end, next_start) =
1747                    slice_cell_by_display_width(trimmed, pos_byte, columns[idx].width);
1748                pos_byte = next_start;
1749                end
1750            };
1751            let slice = &trimmed[start_byte..end_byte];
1752            if idx + 1 == columns.len() {
1753                pos_byte = trimmed.len();
1754            }
1755            slice
1756        } else {
1757            ""
1758        };
1759
1760        // Emit leading whitespace in cell
1761        let cell_trimmed = part.trim();
1762        let ws_start_len = part.len() - part.trim_start().len();
1763        if ws_start_len > 0 {
1764            builder.token(SyntaxKind::WHITESPACE.into(), &part[..ws_start_len]);
1765        }
1766
1767        // Emit TABLE_CELL with inline parsing
1768        emit_table_cell(builder, cell_content, config);
1769
1770        // Emit trailing whitespace in cell
1771        let ws_end_start = ws_start_len + cell_trimmed.len();
1772        if ws_end_start < part.len() {
1773            builder.token(SyntaxKind::WHITESPACE.into(), &part[ws_end_start..]);
1774        }
1775
1776        // Emit pipe separator (unless this is the last cell and line doesn't end with |)
1777        if idx < cell_contents.len() - 1 || trimmed.ends_with('|') {
1778            builder.token(SyntaxKind::TEXT.into(), "|");
1779        }
1780    }
1781
1782    // Emit trailing whitespace before newline
1783    let trailing_ws_start = leading_ws_len + trimmed.len();
1784    if trailing_ws_start < line_without_newline.len() {
1785        builder.token(
1786            SyntaxKind::WHITESPACE.into(),
1787            &line_without_newline[trailing_ws_start..],
1788        );
1789    }
1790
1791    // Emit newline
1792    if !newline_str.is_empty() {
1793        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1794    }
1795
1796    // Emit continuation lines as TEXT for losslessness
1797    for line in lines.iter().skip(1) {
1798        emit_line_tokens(builder, line);
1799    }
1800
1801    builder.finish_node();
1802}
1803
1804/// Try to parse a grid table starting at the given position.
1805/// Returns the number of lines consumed if successful.
1806pub(crate) fn try_parse_grid_table(
1807    lines: &[&str],
1808    start_pos: usize,
1809    builder: &mut GreenNodeBuilder<'static>,
1810    config: &ParserOptions,
1811) -> Option<usize> {
1812    if start_pos >= lines.len() {
1813        return None;
1814    }
1815
1816    // Check if this line is a caption followed by a table
1817    // If so, the actual table starts after the caption and blank line
1818    let (actual_start, caption_before) = if is_caption_followed_by_table(lines, start_pos) {
1819        let (cap_start, cap_end) = caption_range_starting_at(lines, start_pos)?;
1820        let mut pos = cap_end;
1821        while pos < lines.len() && lines[pos].trim().is_empty() {
1822            pos += 1;
1823        }
1824        (pos, Some((cap_start, cap_end)))
1825    } else {
1826        (start_pos, None)
1827    };
1828
1829    if actual_start >= lines.len() {
1830        return None;
1831    }
1832
1833    // First line must be a grid separator
1834    let first_line = lines[actual_start];
1835    let _columns = try_parse_grid_separator(first_line)?;
1836
1837    // Track table structure
1838    let mut end_pos = actual_start + 1;
1839    let mut found_header_sep = false;
1840    let mut in_footer = false;
1841
1842    // Scan table lines
1843    while end_pos < lines.len() {
1844        let line = lines[end_pos];
1845
1846        // Check for blank line (table ends)
1847        if line.trim().is_empty() {
1848            break;
1849        }
1850
1851        // Check for separator line
1852        if let Some(sep_cols) = try_parse_grid_separator(line) {
1853            // Check if this is a header separator (=)
1854            if sep_cols.iter().any(|c| c.is_header_separator) {
1855                if !found_header_sep {
1856                    found_header_sep = true;
1857                } else if !in_footer {
1858                    // Second = separator starts footer
1859                    in_footer = true;
1860                }
1861            }
1862            end_pos += 1;
1863            continue;
1864        }
1865
1866        // Check for content row
1867        if is_grid_content_row(line) {
1868            end_pos += 1;
1869            continue;
1870        }
1871
1872        // Not a valid grid table line - table ends
1873        break;
1874    }
1875
1876    // Must have consumed at least 3 lines (top separator, content, bottom separator)
1877    // Or just top + content rows that end with a separator
1878    if end_pos <= actual_start + 1 {
1879        return None;
1880    }
1881
1882    // Last consumed line should be a separator for a well-formed table
1883    // But we'll be lenient and accept tables ending with content rows
1884
1885    // Check for caption before table (only if we didn't already detected it)
1886    let caption_before = caption_before.or_else(|| find_caption_before_table(lines, actual_start));
1887
1888    // Check for caption after table
1889    let caption_after = if caption_before.is_some() {
1890        None
1891    } else {
1892        find_caption_after_table(lines, end_pos)
1893    };
1894
1895    // Build the grid table
1896    builder.start_node(SyntaxKind::GRID_TABLE.into());
1897
1898    // Emit caption before if present
1899    if let Some((cap_start, cap_end)) = caption_before {
1900        emit_table_caption(builder, lines, cap_start, cap_end, config);
1901        // Emit blank line between caption and table if present
1902        if cap_end < actual_start {
1903            for line in lines.iter().take(actual_start).skip(cap_end) {
1904                if line.trim().is_empty() {
1905                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1906                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1907                    builder.finish_node();
1908                }
1909            }
1910        }
1911    }
1912
1913    // Track whether we've passed the header separator
1914    let mut past_header_sep = false;
1915    let mut in_footer_section = false;
1916    let mut current_row_lines: Vec<&str> = Vec::new();
1917    let mut current_row_kind = SyntaxKind::TABLE_HEADER;
1918
1919    // Emit table rows - accumulate multi-line cells
1920    for line in lines.iter().take(end_pos).skip(actual_start) {
1921        if let Some(sep_cols) = try_parse_grid_separator(line) {
1922            // Separator line - emit any accumulated row first
1923            if !current_row_lines.is_empty() {
1924                emit_grid_table_row(
1925                    builder,
1926                    &current_row_lines,
1927                    &sep_cols,
1928                    current_row_kind,
1929                    config,
1930                );
1931                current_row_lines.clear();
1932            }
1933
1934            let is_header_sep = sep_cols.iter().any(|c| c.is_header_separator);
1935
1936            if is_header_sep {
1937                if !past_header_sep {
1938                    // This is the header/body separator
1939                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1940                    emit_line_tokens(builder, line);
1941                    builder.finish_node();
1942                    past_header_sep = true;
1943                } else {
1944                    // Footer separator
1945                    if !in_footer_section {
1946                        in_footer_section = true;
1947                    }
1948                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1949                    emit_line_tokens(builder, line);
1950                    builder.finish_node();
1951                }
1952            } else {
1953                // Regular separator (row boundary)
1954                builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1955                emit_line_tokens(builder, line);
1956                builder.finish_node();
1957            }
1958        } else if is_grid_content_row(line) {
1959            // Content row - accumulate for multi-line cells
1960            current_row_kind = if !past_header_sep && found_header_sep {
1961                SyntaxKind::TABLE_HEADER
1962            } else if in_footer_section {
1963                SyntaxKind::TABLE_FOOTER
1964            } else {
1965                SyntaxKind::TABLE_ROW
1966            };
1967
1968            current_row_lines.push(line);
1969        }
1970    }
1971
1972    // Emit any remaining accumulated row
1973    if !current_row_lines.is_empty() {
1974        // Use first separator's columns for cell boundaries
1975        if let Some(sep_cols) = try_parse_grid_separator(lines[actual_start]) {
1976            emit_grid_table_row(
1977                builder,
1978                &current_row_lines,
1979                &sep_cols,
1980                current_row_kind,
1981                config,
1982            );
1983        }
1984    }
1985
1986    // Emit caption after if present
1987    if let Some((cap_start, cap_end)) = caption_after {
1988        if cap_start > end_pos {
1989            for line in lines.iter().take(cap_start).skip(end_pos) {
1990                if line.trim().is_empty() {
1991                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1992                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1993                    builder.finish_node();
1994                }
1995            }
1996        }
1997        emit_table_caption(builder, lines, cap_start, cap_end, config);
1998    }
1999
2000    builder.finish_node(); // GRID_TABLE
2001
2002    // Calculate lines consumed
2003    let table_start = caption_before
2004        .map(|(start, _)| start)
2005        .unwrap_or(actual_start);
2006    let table_end = if let Some((_, cap_end)) = caption_after {
2007        cap_end
2008    } else {
2009        end_pos
2010    };
2011
2012    Some(table_end - table_start)
2013}
2014
2015#[cfg(test)]
2016mod grid_table_tests {
2017    use super::*;
2018
2019    #[test]
2020    fn test_grid_separator_detection() {
2021        assert!(try_parse_grid_separator("+---+---+").is_some());
2022        assert!(try_parse_grid_separator("+===+===+").is_some());
2023        assert!(try_parse_grid_separator("+---------------+---------------+").is_some());
2024        assert!(try_parse_grid_separator("+:---:+").is_some()); // center aligned
2025        assert!(try_parse_grid_separator("not a separator").is_none());
2026        assert!(try_parse_grid_separator("|---|---|").is_none()); // pipe table sep
2027    }
2028
2029    #[test]
2030    fn test_grid_header_separator() {
2031        let cols = try_parse_grid_separator("+===+===+").unwrap();
2032        assert!(cols.iter().all(|c| c.is_header_separator));
2033
2034        let cols2 = try_parse_grid_separator("+---+---+").unwrap();
2035        assert!(cols2.iter().all(|c| !c.is_header_separator));
2036    }
2037
2038    #[test]
2039    fn test_grid_content_row_detection() {
2040        assert!(is_grid_content_row("| content | content |"));
2041        assert!(is_grid_content_row("|  |  |"));
2042        assert!(is_grid_content_row("| content +------+"));
2043        assert!(!is_grid_content_row("+---+---+")); // separator, not content
2044        assert!(!is_grid_content_row("no pipes here"));
2045    }
2046
2047    #[test]
2048    fn test_basic_grid_table() {
2049        let input = vec![
2050            "+-------+-------+",
2051            "| Col1  | Col2  |",
2052            "+=======+=======+",
2053            "| A     | B     |",
2054            "+-------+-------+",
2055            "",
2056        ];
2057
2058        let mut builder = GreenNodeBuilder::new();
2059        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2060
2061        assert!(result.is_some());
2062        assert_eq!(result.unwrap(), 5);
2063    }
2064
2065    #[test]
2066    fn test_grid_table_multirow() {
2067        let input = vec![
2068            "+---------------+---------------+",
2069            "| Fruit         | Advantages    |",
2070            "+===============+===============+",
2071            "| Bananas       | - wrapper     |",
2072            "|               | - color       |",
2073            "+---------------+---------------+",
2074            "| Oranges       | - scurvy      |",
2075            "|               | - tasty       |",
2076            "+---------------+---------------+",
2077            "",
2078        ];
2079
2080        let mut builder = GreenNodeBuilder::new();
2081        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2082
2083        assert!(result.is_some());
2084        assert_eq!(result.unwrap(), 9);
2085    }
2086
2087    #[test]
2088    fn test_grid_table_with_footer() {
2089        let input = vec![
2090            "+-------+-------+",
2091            "| Fruit | Price |",
2092            "+=======+=======+",
2093            "| Apple | $1.00 |",
2094            "+-------+-------+",
2095            "| Pear  | $1.50 |",
2096            "+=======+=======+",
2097            "| Total | $2.50 |",
2098            "+=======+=======+",
2099            "",
2100        ];
2101
2102        let mut builder = GreenNodeBuilder::new();
2103        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2104
2105        assert!(result.is_some());
2106        assert_eq!(result.unwrap(), 9);
2107    }
2108
2109    #[test]
2110    fn test_grid_table_headerless() {
2111        let input = vec![
2112            "+-------+-------+",
2113            "| A     | B     |",
2114            "+-------+-------+",
2115            "| C     | D     |",
2116            "+-------+-------+",
2117            "",
2118        ];
2119
2120        let mut builder = GreenNodeBuilder::new();
2121        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2122
2123        assert!(result.is_some());
2124        assert_eq!(result.unwrap(), 5);
2125    }
2126
2127    #[test]
2128    fn test_grid_table_with_caption_before() {
2129        let input = vec![
2130            ": Sample table",
2131            "",
2132            "+-------+-------+",
2133            "| A     | B     |",
2134            "+=======+=======+",
2135            "| C     | D     |",
2136            "+-------+-------+",
2137            "",
2138        ];
2139
2140        let mut builder = GreenNodeBuilder::new();
2141        let result = try_parse_grid_table(&input, 2, &mut builder, &ParserOptions::default());
2142
2143        assert!(result.is_some());
2144        // Should include caption + blank + table
2145        assert_eq!(result.unwrap(), 7);
2146    }
2147
2148    #[test]
2149    fn test_grid_table_with_caption_after() {
2150        let input = vec![
2151            "+-------+-------+",
2152            "| A     | B     |",
2153            "+=======+=======+",
2154            "| C     | D     |",
2155            "+-------+-------+",
2156            "",
2157            "Table: My grid table",
2158            "",
2159        ];
2160
2161        let mut builder = GreenNodeBuilder::new();
2162        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2163
2164        assert!(result.is_some());
2165        // table + blank + caption
2166        assert_eq!(result.unwrap(), 7);
2167    }
2168}
2169
2170// ============================================================================
2171// Multiline Table Parsing
2172// ============================================================================
2173
2174/// Check if a line is a multiline table separator (continuous dashes).
2175/// Multiline table separators span the full width and are all dashes.
2176/// Returns Some(columns) if valid, None otherwise.
2177fn try_parse_multiline_separator(line: &str) -> Option<Vec<Column>> {
2178    let trimmed = line.trim_start();
2179    let leading_spaces = line.len() - trimmed.len();
2180
2181    // Must have leading spaces <= 3 to not be a code block
2182    if leading_spaces > 3 {
2183        return None;
2184    }
2185
2186    let trimmed = trimmed.trim_end();
2187
2188    // Must be all dashes (continuous line of dashes)
2189    if trimmed.is_empty() || !trimmed.chars().all(|c| c == '-') {
2190        return None;
2191    }
2192
2193    // Must have at least 3 dashes
2194    if trimmed.len() < 3 {
2195        return None;
2196    }
2197
2198    // This is a full-width separator - columns will be determined by column separator lines
2199    Some(vec![Column {
2200        start: leading_spaces,
2201        end: leading_spaces + trimmed.len(),
2202        alignment: Alignment::Default,
2203    }])
2204}
2205
2206/// Check if a line is a column separator line for multiline tables.
2207/// Column separators have dashes with spaces between them to define columns.
2208fn is_column_separator(line: &str) -> bool {
2209    try_parse_table_separator(line).is_some() && !line.contains('*') && !line.contains('_')
2210}
2211
2212fn is_headerless_single_row_without_blank(
2213    lines: &[&str],
2214    row_start: usize,
2215    row_end: usize,
2216    columns: &[Column],
2217) -> bool {
2218    if row_start >= row_end {
2219        return false;
2220    }
2221
2222    if row_end - row_start == 1 {
2223        return false;
2224    }
2225
2226    let Some(last_col) = columns.last() else {
2227        return false;
2228    };
2229
2230    for line in lines.iter().take(row_end).skip(row_start + 1) {
2231        let (content, _) = strip_newline(line);
2232        let prefix_end = last_col.start.min(content.len());
2233        if !content[..prefix_end].trim().is_empty() {
2234            return false;
2235        }
2236    }
2237
2238    true
2239}
2240
2241/// Try to parse a multiline table starting at the given position.
2242/// Returns the number of lines consumed if successful.
2243pub(crate) fn try_parse_multiline_table(
2244    lines: &[&str],
2245    start_pos: usize,
2246    builder: &mut GreenNodeBuilder<'static>,
2247    config: &ParserOptions,
2248) -> Option<usize> {
2249    if start_pos >= lines.len() {
2250        return None;
2251    }
2252
2253    let first_line = lines[start_pos];
2254
2255    // First line can be either:
2256    // 1. A full-width dash separator (for tables with headers)
2257    // 2. A column separator (for headerless tables)
2258    let is_full_width_start = try_parse_multiline_separator(first_line).is_some();
2259    let is_column_sep_start = !is_full_width_start && is_column_separator(first_line);
2260    let headerless_columns = if is_column_sep_start {
2261        try_parse_table_separator(first_line)
2262    } else {
2263        None
2264    };
2265
2266    if !is_full_width_start && !is_column_sep_start {
2267        return None;
2268    }
2269
2270    // Look ahead to find the structure
2271    let mut pos = start_pos + 1;
2272    let mut found_column_sep = is_column_sep_start; // Already found if headerless
2273    let mut column_sep_pos = if is_column_sep_start { start_pos } else { 0 };
2274    let mut has_header = false;
2275    let mut found_blank_line = false;
2276    let mut found_closing_sep = false;
2277    let mut content_line_count = 0usize;
2278
2279    // Scan for header section and column separator
2280    while pos < lines.len() {
2281        let line = lines[pos];
2282
2283        // Check for column separator (defines columns) - only if we started with full-width
2284        if is_full_width_start && is_column_separator(line) && !found_column_sep {
2285            found_column_sep = true;
2286            column_sep_pos = pos;
2287            has_header = pos > start_pos + 1; // Has header if there's content before column sep
2288            pos += 1;
2289            continue;
2290        }
2291
2292        // Check for blank line (row separator in body)
2293        if line.trim().is_empty() {
2294            found_blank_line = true;
2295            pos += 1;
2296            // Check if next line is a valid closing separator for this table shape.
2297            if pos < lines.len() {
2298                let next = lines[pos];
2299                let is_valid_closer = if is_full_width_start {
2300                    try_parse_multiline_separator(next).is_some()
2301                } else {
2302                    is_column_separator(next)
2303                };
2304                if is_valid_closer {
2305                    found_closing_sep = true;
2306                    pos += 1; // Include the closing separator
2307                    break;
2308                }
2309            }
2310            continue;
2311        }
2312
2313        // Check for closing full-width dashes (only for full-width-start tables).
2314        if is_full_width_start && try_parse_multiline_separator(line).is_some() {
2315            found_closing_sep = true;
2316            pos += 1;
2317            break;
2318        }
2319
2320        // Check for closing column separator (for headerless tables)
2321        if is_column_sep_start && is_column_separator(line) && content_line_count > 0 {
2322            found_closing_sep = true;
2323            pos += 1;
2324            break;
2325        }
2326
2327        // Content row
2328        content_line_count += 1;
2329        pos += 1;
2330    }
2331
2332    // Must have found a column separator to be a valid multiline table
2333    if !found_column_sep {
2334        return None;
2335    }
2336
2337    // Must have had at least one blank line between rows (distinguishes from simple tables)
2338    if !found_blank_line {
2339        if !is_column_sep_start {
2340            return None;
2341        }
2342        let columns = headerless_columns.as_deref()?;
2343        if !is_headerless_single_row_without_blank(lines, start_pos + 1, pos - 1, columns) {
2344            return None;
2345        }
2346    }
2347
2348    // Must have a closing separator
2349    if !found_closing_sep {
2350        return None;
2351    }
2352
2353    // Must have consumed more than just the opening separator
2354    if pos <= start_pos + 2 {
2355        return None;
2356    }
2357
2358    let end_pos = pos;
2359
2360    // Extract column boundaries from the separator line
2361    let columns =
2362        try_parse_table_separator(lines[column_sep_pos]).expect("Column separator must be valid");
2363
2364    // Check for caption before table
2365    let caption_before = find_caption_before_table(lines, start_pos);
2366
2367    // Check for caption after table
2368    let caption_after = if caption_before.is_some() {
2369        None
2370    } else {
2371        find_caption_after_table(lines, end_pos)
2372    };
2373
2374    // Build the multiline table
2375    builder.start_node(SyntaxKind::MULTILINE_TABLE.into());
2376
2377    // Emit caption before if present
2378    if let Some((cap_start, cap_end)) = caption_before {
2379        emit_table_caption(builder, lines, cap_start, cap_end, config);
2380
2381        // Emit blank line between caption and table if present
2382        if cap_end < start_pos {
2383            for line in lines.iter().take(start_pos).skip(cap_end) {
2384                if line.trim().is_empty() {
2385                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2386                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2387                    builder.finish_node();
2388                }
2389            }
2390        }
2391    }
2392
2393    // Emit opening separator
2394    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2395    emit_line_tokens(builder, lines[start_pos]);
2396    builder.finish_node();
2397
2398    // Track state for emitting
2399    let mut in_header = has_header;
2400    let mut current_row_lines: Vec<&str> = Vec::new();
2401
2402    for (i, line) in lines.iter().enumerate().take(end_pos).skip(start_pos + 1) {
2403        // Column separator (header/body divider)
2404        if i == column_sep_pos {
2405            // Emit any accumulated header lines
2406            if !current_row_lines.is_empty() {
2407                emit_multiline_table_row(
2408                    builder,
2409                    &current_row_lines,
2410                    &columns,
2411                    SyntaxKind::TABLE_HEADER,
2412                    config,
2413                );
2414                current_row_lines.clear();
2415            }
2416
2417            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2418            emit_line_tokens(builder, line);
2419            builder.finish_node();
2420            in_header = false;
2421            continue;
2422        }
2423
2424        // Closing separator (full-width or column separator at end)
2425        if try_parse_multiline_separator(line).is_some() || is_column_separator(line) {
2426            // Emit any accumulated row lines
2427            if !current_row_lines.is_empty() {
2428                let kind = if in_header {
2429                    SyntaxKind::TABLE_HEADER
2430                } else {
2431                    SyntaxKind::TABLE_ROW
2432                };
2433                emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2434                current_row_lines.clear();
2435            }
2436
2437            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2438            emit_line_tokens(builder, line);
2439            builder.finish_node();
2440            continue;
2441        }
2442
2443        // Blank line (row separator)
2444        if line.trim().is_empty() {
2445            // Emit accumulated row
2446            if !current_row_lines.is_empty() {
2447                let kind = if in_header {
2448                    SyntaxKind::TABLE_HEADER
2449                } else {
2450                    SyntaxKind::TABLE_ROW
2451                };
2452                emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2453                current_row_lines.clear();
2454            }
2455
2456            builder.start_node(SyntaxKind::BLANK_LINE.into());
2457            builder.token(SyntaxKind::BLANK_LINE.into(), line);
2458            builder.finish_node();
2459            continue;
2460        }
2461
2462        // Content line - accumulate for current row
2463        current_row_lines.push(line);
2464    }
2465
2466    // Emit any remaining accumulated lines
2467    if !current_row_lines.is_empty() {
2468        let kind = if in_header {
2469            SyntaxKind::TABLE_HEADER
2470        } else {
2471            SyntaxKind::TABLE_ROW
2472        };
2473        emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2474    }
2475
2476    // Emit caption after if present
2477    if let Some((cap_start, cap_end)) = caption_after {
2478        if cap_start > end_pos {
2479            for line in lines.iter().take(cap_start).skip(end_pos) {
2480                if line.trim().is_empty() {
2481                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2482                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2483                    builder.finish_node();
2484                }
2485            }
2486        }
2487        emit_table_caption(builder, lines, cap_start, cap_end, config);
2488    }
2489
2490    builder.finish_node(); // MultilineTable
2491
2492    // Calculate lines consumed
2493    let table_start = caption_before.map(|(start, _)| start).unwrap_or(start_pos);
2494    let table_end = if let Some((_, cap_end)) = caption_after {
2495        cap_end
2496    } else {
2497        end_pos
2498    };
2499
2500    Some(table_end - table_start)
2501}
2502
2503/// Extract cell contents from first line only (for CST emission).
2504/// Multi-line content will be in continuation TEXT tokens.
2505fn extract_first_line_cell_contents(line: &str, columns: &[Column]) -> Vec<String> {
2506    let (line_content, _) = strip_newline(line);
2507    let mut cells = Vec::new();
2508
2509    for column in columns.iter() {
2510        let column_start = column_offset_to_byte_index(line_content, column.start);
2511        let column_end = column_offset_to_byte_index(line_content, column.end);
2512
2513        // Extract FULL text for this column (including whitespace)
2514        let cell_text = if column_start < column_end {
2515            &line_content[column_start..column_end]
2516        } else if column_start < line_content.len() {
2517            &line_content[column_start..]
2518        } else {
2519            ""
2520        };
2521
2522        cells.push(cell_text.to_string());
2523    }
2524
2525    cells
2526}
2527
2528/// Emit a multiline table row with inline parsing (Phase 7.1).
2529fn emit_multiline_table_row(
2530    builder: &mut GreenNodeBuilder<'static>,
2531    lines: &[&str],
2532    columns: &[Column],
2533    kind: SyntaxKind,
2534    config: &ParserOptions,
2535) {
2536    if lines.is_empty() {
2537        return;
2538    }
2539
2540    // Extract cell contents from first line only (for CST losslessness)
2541    let first_line = lines[0];
2542    let cell_contents = extract_first_line_cell_contents(first_line, columns);
2543
2544    builder.start_node(kind.into());
2545
2546    // Emit first line with TABLE_CELL nodes
2547    let (trimmed, newline_str) = strip_newline(first_line);
2548    let mut current_pos = 0;
2549
2550    for (col_idx, column) in columns.iter().enumerate() {
2551        let cell_text = &cell_contents[col_idx];
2552        let cell_start = column_offset_to_byte_index(trimmed, column.start);
2553        let cell_end = column_offset_to_byte_index(trimmed, column.end);
2554
2555        // Emit whitespace before cell
2556        if current_pos < cell_start {
2557            builder.token(
2558                SyntaxKind::WHITESPACE.into(),
2559                &trimmed[current_pos..cell_start],
2560            );
2561        }
2562
2563        // Emit cell with inline parsing (first line content only)
2564        emit_table_cell(builder, cell_text, config);
2565
2566        current_pos = cell_end;
2567    }
2568
2569    // Emit trailing whitespace
2570    if current_pos < trimmed.len() {
2571        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
2572    }
2573
2574    // Emit newline
2575    if !newline_str.is_empty() {
2576        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2577    }
2578
2579    // Emit continuation lines as TEXT to preserve exact line structure
2580    for line in lines.iter().skip(1) {
2581        emit_line_tokens(builder, line);
2582    }
2583
2584    builder.finish_node();
2585}
2586
2587#[cfg(test)]
2588mod multiline_table_tests {
2589    use super::*;
2590    use crate::syntax::SyntaxNode;
2591
2592    #[test]
2593    fn test_multiline_separator_detection() {
2594        assert!(
2595            try_parse_multiline_separator(
2596                "-------------------------------------------------------------"
2597            )
2598            .is_some()
2599        );
2600        assert!(try_parse_multiline_separator("---").is_some());
2601        assert!(try_parse_multiline_separator("  -----").is_some()); // with leading spaces
2602        assert!(try_parse_multiline_separator("--").is_none()); // too short
2603        assert!(try_parse_multiline_separator("--- ---").is_none()); // has spaces
2604        assert!(try_parse_multiline_separator("+---+").is_none()); // grid separator
2605    }
2606
2607    #[test]
2608    fn test_basic_multiline_table() {
2609        let input = vec![
2610            "-------------------------------------------------------------",
2611            " Centered   Default           Right Left",
2612            "  Header    Aligned         Aligned Aligned",
2613            "----------- ------- --------------- -------------------------",
2614            "   First    row                12.0 Example of a row that",
2615            "                                    spans multiple lines.",
2616            "",
2617            "  Second    row                 5.0 Here's another one.",
2618            "-------------------------------------------------------------",
2619            "",
2620        ];
2621
2622        let mut builder = GreenNodeBuilder::new();
2623        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2624
2625        assert!(result.is_some());
2626        assert_eq!(result.unwrap(), 9);
2627    }
2628
2629    #[test]
2630    fn test_multiline_table_headerless() {
2631        let input = vec![
2632            "----------- ------- --------------- -------------------------",
2633            "   First    row                12.0 Example of a row that",
2634            "                                    spans multiple lines.",
2635            "",
2636            "  Second    row                 5.0 Here's another one.",
2637            "----------- ------- --------------- -------------------------",
2638            "",
2639        ];
2640
2641        let mut builder = GreenNodeBuilder::new();
2642        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2643
2644        assert!(result.is_some());
2645        assert_eq!(result.unwrap(), 6);
2646    }
2647
2648    #[test]
2649    fn test_multiline_table_headerless_single_line_is_not_multiline() {
2650        let input = vec![
2651            "-------     ------ ----------   -------",
2652            "     12     12        12             12",
2653            "-------     ------ ----------   -------",
2654            "",
2655            "Not part of table.",
2656            "",
2657        ];
2658
2659        let mut builder = GreenNodeBuilder::new();
2660        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2661
2662        assert!(result.is_none());
2663    }
2664
2665    #[test]
2666    fn test_multiline_table_headerless_single_row_continuation_without_blank_line() {
2667        let input = vec![
2668            "----------  ---------  -----------  ---------------------------",
2669            "   First    row               12.0  Example of a row that spans",
2670            "                                    multiple lines.",
2671            "----------  ---------  -----------  ---------------------------",
2672            "",
2673        ];
2674
2675        let mut builder = GreenNodeBuilder::new();
2676        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2677
2678        assert!(result.is_some());
2679        assert_eq!(result.unwrap(), 4);
2680    }
2681
2682    #[test]
2683    fn test_multiline_table_with_caption() {
2684        let input = vec![
2685            "-------------------------------------------------------------",
2686            " Col1       Col2",
2687            "----------- -------",
2688            "   A        B",
2689            "",
2690            "-------------------------------------------------------------",
2691            "",
2692            "Table: Here's the caption.",
2693            "",
2694        ];
2695
2696        let mut builder = GreenNodeBuilder::new();
2697        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2698
2699        assert!(result.is_some());
2700        // table (6 lines) + blank + caption
2701        assert_eq!(result.unwrap(), 8);
2702    }
2703
2704    #[test]
2705    fn test_multiline_table_single_row() {
2706        let input = vec![
2707            "---------------------------------------------",
2708            " Header1    Header2",
2709            "----------- -----------",
2710            "   Data     More data",
2711            "",
2712            "---------------------------------------------",
2713            "",
2714        ];
2715
2716        let mut builder = GreenNodeBuilder::new();
2717        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2718
2719        assert!(result.is_some());
2720        assert_eq!(result.unwrap(), 6);
2721    }
2722
2723    #[test]
2724    fn test_headerless_multiline_table_does_not_close_on_full_width_rule() {
2725        let input = vec![
2726            "- - - - -",
2727            "Third section with underscores.",
2728            "",
2729            "_____",
2730            "",
2731            "> Quote before rule",
2732            ">",
2733            "> ***",
2734            ">",
2735            "> Quote after rule",
2736            "",
2737            "Final paragraph.",
2738            "",
2739            "Here's a horizontal rule:",
2740            "",
2741            "---",
2742            "Text directly after the horizontal rule.",
2743            "",
2744        ];
2745
2746        let mut builder = GreenNodeBuilder::new();
2747        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2748
2749        assert!(result.is_none());
2750    }
2751
2752    #[test]
2753    fn test_not_multiline_table() {
2754        // Simple table should not be parsed as multiline
2755        let input = vec![
2756            "  Right     Left     Center     Default",
2757            "-------     ------ ----------   -------",
2758            "     12     12        12            12",
2759            "",
2760        ];
2761
2762        let mut builder = GreenNodeBuilder::new();
2763        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2764
2765        // Should not parse because first line isn't a full-width separator
2766        assert!(result.is_none());
2767    }
2768
2769    // Phase 7.1: Unit tests for emit_table_cell() helper
2770    #[test]
2771    fn test_emit_table_cell_plain_text() {
2772        let mut builder = GreenNodeBuilder::new();
2773        emit_table_cell(&mut builder, "Cell", &ParserOptions::default());
2774        let green = builder.finish();
2775        let node = SyntaxNode::new_root(green);
2776
2777        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2778        assert_eq!(node.text(), "Cell");
2779
2780        // Should have TEXT child
2781        let children: Vec<_> = node.children_with_tokens().collect();
2782        assert_eq!(children.len(), 1);
2783        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
2784    }
2785
2786    #[test]
2787    fn test_emit_table_cell_with_emphasis() {
2788        let mut builder = GreenNodeBuilder::new();
2789        emit_table_cell(&mut builder, "*italic*", &ParserOptions::default());
2790        let green = builder.finish();
2791        let node = SyntaxNode::new_root(green);
2792
2793        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2794        assert_eq!(node.text(), "*italic*");
2795
2796        // Should have EMPHASIS child
2797        let children: Vec<_> = node.children().collect();
2798        assert_eq!(children.len(), 1);
2799        assert_eq!(children[0].kind(), SyntaxKind::EMPHASIS);
2800    }
2801
2802    #[test]
2803    fn test_emit_table_cell_with_code() {
2804        let mut builder = GreenNodeBuilder::new();
2805        emit_table_cell(&mut builder, "`code`", &ParserOptions::default());
2806        let green = builder.finish();
2807        let node = SyntaxNode::new_root(green);
2808
2809        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2810        assert_eq!(node.text(), "`code`");
2811
2812        // Should have CODE_SPAN child
2813        let children: Vec<_> = node.children().collect();
2814        assert_eq!(children.len(), 1);
2815        assert_eq!(children[0].kind(), SyntaxKind::INLINE_CODE);
2816    }
2817
2818    #[test]
2819    fn test_emit_table_cell_with_link() {
2820        let mut builder = GreenNodeBuilder::new();
2821        emit_table_cell(&mut builder, "[text](url)", &ParserOptions::default());
2822        let green = builder.finish();
2823        let node = SyntaxNode::new_root(green);
2824
2825        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2826        assert_eq!(node.text(), "[text](url)");
2827
2828        // Should have LINK child
2829        let children: Vec<_> = node.children().collect();
2830        assert_eq!(children.len(), 1);
2831        assert_eq!(children[0].kind(), SyntaxKind::LINK);
2832    }
2833
2834    #[test]
2835    fn test_emit_table_cell_with_strong() {
2836        let mut builder = GreenNodeBuilder::new();
2837        emit_table_cell(&mut builder, "**bold**", &ParserOptions::default());
2838        let green = builder.finish();
2839        let node = SyntaxNode::new_root(green);
2840
2841        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2842        assert_eq!(node.text(), "**bold**");
2843
2844        // Should have STRONG child
2845        let children: Vec<_> = node.children().collect();
2846        assert_eq!(children.len(), 1);
2847        assert_eq!(children[0].kind(), SyntaxKind::STRONG);
2848    }
2849
2850    #[test]
2851    fn test_emit_table_cell_mixed_inline() {
2852        let mut builder = GreenNodeBuilder::new();
2853        emit_table_cell(
2854            &mut builder,
2855            "Text **bold** and `code`",
2856            &ParserOptions::default(),
2857        );
2858        let green = builder.finish();
2859        let node = SyntaxNode::new_root(green);
2860
2861        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2862        assert_eq!(node.text(), "Text **bold** and `code`");
2863
2864        // Should have multiple children: TEXT, STRONG, TEXT, CODE_SPAN
2865        let children: Vec<_> = node.children_with_tokens().collect();
2866        assert!(children.len() >= 4);
2867
2868        // Check some expected types
2869        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
2870        assert_eq!(children[1].kind(), SyntaxKind::STRONG);
2871    }
2872
2873    #[test]
2874    fn test_emit_table_cell_empty() {
2875        let mut builder = GreenNodeBuilder::new();
2876        emit_table_cell(&mut builder, "", &ParserOptions::default());
2877        let green = builder.finish();
2878        let node = SyntaxNode::new_root(green);
2879
2880        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2881        assert_eq!(node.text(), "");
2882
2883        // Empty cell should have no children
2884        let children: Vec<_> = node.children_with_tokens().collect();
2885        assert_eq!(children.len(), 0);
2886    }
2887
2888    #[test]
2889    fn test_emit_table_cell_escaped_pipe() {
2890        let mut builder = GreenNodeBuilder::new();
2891        emit_table_cell(&mut builder, r"A \| B", &ParserOptions::default());
2892        let green = builder.finish();
2893        let node = SyntaxNode::new_root(green);
2894
2895        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2896        // The escaped pipe should be preserved
2897        assert_eq!(node.text(), r"A \| B");
2898    }
2899}