Skip to main content

panache_parser/parser/blocks/
tables.rs

1//! Simple table parsing for Pandoc's simple_tables extension.
2
3use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6use unicode_width::UnicodeWidthChar;
7
8use crate::parser::utils::helpers::{emit_line_tokens, strip_newline};
9use crate::parser::utils::inline_emission;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum Alignment {
13    Left,
14    Right,
15    Center,
16    Default,
17}
18
19/// Column information extracted from the separator line.
20#[derive(Debug, Clone)]
21pub(crate) struct Column {
22    /// Start position (byte index) in the line
23    start: usize,
24    /// End position (byte index) in the line
25    end: usize,
26    /// Column alignment
27    alignment: Alignment,
28}
29
30/// Try to detect if a line is a table separator line.
31/// Returns Some(column positions) if it's a valid separator.
32pub(crate) fn try_parse_table_separator(line: &str) -> Option<Vec<Column>> {
33    let trimmed = line.trim_start();
34    // Strip trailing newline if present (CRLF or LF)
35    let (trimmed, newline_str) = strip_newline(trimmed);
36    let leading_spaces = line.len() - trimmed.len() - newline_str.len();
37
38    // Must have leading spaces <= 3 to not be a code block
39    if leading_spaces > 3 {
40        return None;
41    }
42
43    // Simple tables only use dashed separators.
44    if trimmed.contains('*') || trimmed.contains('_') {
45        return None;
46    }
47
48    // Must contain at least one dash
49    if !trimmed.contains('-') {
50        return None;
51    }
52
53    // A separator line consists of dashes and spaces
54    if !trimmed.chars().all(|c| c == '-' || c == ' ') {
55        return None;
56    }
57
58    // Must not be a horizontal rule.
59    let dash_groups: Vec<_> = trimmed.split(' ').filter(|s| !s.is_empty()).collect();
60    if dash_groups.len() <= 1 {
61        return None;
62    }
63
64    // Extract column positions from dash groups
65    let columns = extract_columns(trimmed, leading_spaces);
66
67    if columns.is_empty() {
68        return None;
69    }
70
71    Some(columns)
72}
73
74/// Extract column positions from a separator line.
75fn extract_columns(separator: &str, offset: usize) -> Vec<Column> {
76    let mut columns = Vec::new();
77    let mut in_dashes = false;
78    let mut col_start = 0;
79
80    for (i, ch) in separator.char_indices() {
81        match ch {
82            '-' if !in_dashes => {
83                col_start = i + offset;
84                in_dashes = true;
85            }
86            ' ' if in_dashes => {
87                columns.push(Column {
88                    start: col_start,
89                    end: i + offset,
90                    alignment: Alignment::Default, // Will be determined later
91                });
92                in_dashes = false;
93            }
94            _ => {}
95        }
96    }
97
98    // Handle last column
99    if in_dashes {
100        columns.push(Column {
101            start: col_start,
102            end: separator.len() + offset,
103            alignment: Alignment::Default,
104        });
105    }
106
107    columns
108}
109
110/// Convert a character column offset into a UTF-8 byte index for `line`.
111///
112/// Simple-table column boundaries come from ASCII separator lines where
113/// character and byte offsets are identical. Data rows may contain multibyte
114/// characters, so we must remap offsets before slicing.
115fn column_offset_to_byte_index(line: &str, offset: usize) -> usize {
116    line.char_indices()
117        .nth(offset)
118        .map_or(line.len(), |(byte_idx, _)| byte_idx)
119}
120
121/// Try to parse a table caption from a line.
122/// Returns Some((prefix_len, caption_text)) if it's a caption.
123fn try_parse_caption_prefix(line: &str) -> Option<(usize, &str)> {
124    let trimmed = line.trim_start();
125    let leading_spaces = line.len() - trimmed.len();
126
127    // Must have leading spaces <= 3 to not be a code block
128    if leading_spaces > 3 {
129        return None;
130    }
131
132    // Check for "Table:" or "table:" or just ":".
133    if let Some(rest) = trimmed.strip_prefix("Table:") {
134        Some((leading_spaces + 6, rest))
135    } else if let Some(rest) = trimmed.strip_prefix("table:") {
136        Some((leading_spaces + 6, rest))
137    } else if let Some(rest) = trimmed.strip_prefix(':') {
138        // Just ":" caption markers must be followed by whitespace (Pandoc-style).
139        // This avoids accidentally treating constructs like fenced div fences ":::" as captions.
140        if rest.starts_with(|c: char| c.is_whitespace()) {
141            Some((leading_spaces + 1, rest))
142        } else {
143            None
144        }
145    } else {
146        None
147    }
148}
149
150/// Check if a line could be the start of a table caption.
151fn is_table_caption_start(line: &str) -> bool {
152    try_parse_caption_prefix(line).is_some()
153}
154
155fn is_bare_colon_caption_start(line: &str) -> bool {
156    let trimmed = line.trim_start();
157    trimmed.starts_with(':') && !trimmed.starts_with("::") && !trimmed.starts_with(":::")
158}
159
160fn bare_colon_caption_looks_like_definition_code_block(line: &str) -> bool {
161    let Some((_, rest)) = try_parse_caption_prefix(line) else {
162        return false;
163    };
164    let trimmed = rest.trim_start();
165    trimmed.starts_with("```") || trimmed.starts_with("~~~")
166}
167
168fn line_is_fenced_div_fence(line: &str) -> bool {
169    let trimmed = line.trim_start();
170    let colon_count = trimmed.chars().take_while(|&c| c == ':').count();
171    if colon_count < 3 {
172        return false;
173    }
174    let rest = &trimmed[colon_count..];
175    rest.is_empty() || rest.starts_with(char::is_whitespace)
176}
177
178fn is_valid_caption_start_before_table(lines: &[&str], pos: usize) -> bool {
179    if !is_table_caption_start(lines[pos]) {
180        return false;
181    }
182
183    if is_bare_colon_caption_start(lines[pos])
184        && bare_colon_caption_looks_like_definition_code_block(lines[pos])
185    {
186        return false;
187    }
188
189    // Avoid stealing definition-list definitions (":   ...") as table captions.
190    if is_bare_colon_caption_start(lines[pos])
191        && pos > 0
192        && !lines[pos - 1].trim().is_empty()
193        && !line_is_fenced_div_fence(lines[pos - 1])
194    {
195        return false;
196    }
197    true
198}
199
200/// Check if a line could be the start of a grid table.
201/// Grid tables start with a separator line like +---+---+ or +===+===+
202fn is_grid_table_start(line: &str) -> bool {
203    try_parse_grid_separator(line).is_some()
204}
205
206/// Check if a line could be the start of a multiline table.
207/// Multiline tables start with either:
208/// - A full-width dash separator (----)
209/// - A column separator with dashes and spaces (---- ---- ----)
210fn is_multiline_table_start(line: &str) -> bool {
211    try_parse_multiline_separator(line).is_some() || is_column_separator(line)
212}
213
214/// Check if there's a table following a potential caption at this position.
215/// This is used to avoid parsing a caption as a paragraph when it belongs to a table.
216pub(crate) fn is_caption_followed_by_table(lines: &[&str], caption_pos: usize) -> bool {
217    if caption_pos >= lines.len() {
218        return false;
219    }
220
221    // Caption must start with a caption prefix
222    if !is_valid_caption_start_before_table(lines, caption_pos) {
223        return false;
224    }
225
226    let mut pos = caption_pos + 1;
227
228    // Skip continuation lines of caption (non-blank lines)
229    while pos < lines.len() && !lines[pos].trim().is_empty() {
230        // If we hit a table separator, we found a table
231        if try_parse_table_separator(lines[pos]).is_some() {
232            return true;
233        }
234        pos += 1;
235    }
236
237    // Skip one blank line
238    if pos < lines.len() && lines[pos].trim().is_empty() {
239        pos += 1;
240    }
241
242    // Check for table at next position
243    if pos < lines.len() {
244        let line = lines[pos];
245
246        // Check for grid table start (+---+---+ or +===+===+)
247        if is_grid_table_start(line) {
248            return true;
249        }
250
251        // Check for multiline table start (---- or ---- ---- ----)
252        if is_multiline_table_start(line) {
253            return true;
254        }
255
256        // Could be a separator line (simple/pipe table, headerless)
257        if try_parse_table_separator(line).is_some() {
258            return true;
259        }
260
261        // Or could be a header line followed by separator (simple/pipe table with header)
262        if pos + 1 < lines.len() && !line.trim().is_empty() {
263            let next_line = lines[pos + 1];
264            if try_parse_table_separator(next_line).is_some()
265                || try_parse_pipe_separator(next_line).is_some()
266            {
267                return true;
268            }
269        }
270    }
271
272    false
273}
274
275fn caption_range_starting_at(lines: &[&str], start: usize) -> Option<(usize, usize)> {
276    if start >= lines.len() || !is_table_caption_start(lines[start]) {
277        return None;
278    }
279    let mut end = start + 1;
280    while end < lines.len() && !lines[end].trim().is_empty() {
281        end += 1;
282    }
283    Some((start, end))
284}
285
286/// Find caption before table (if any).
287/// Returns (caption_start, caption_end) positions, or None.
288fn find_caption_before_table(lines: &[&str], table_start: usize) -> Option<(usize, usize)> {
289    if table_start == 0 {
290        return None;
291    }
292
293    // Look backward for a caption
294    // Caption must be immediately before table (with possible blank line between)
295    let mut pos = table_start - 1;
296
297    // Skip one blank line if present
298    if lines[pos].trim().is_empty() {
299        if pos == 0 {
300            return None;
301        }
302        pos -= 1;
303    }
304
305    // Now pos points to the last non-blank line before the table
306    // This could be the last line of a multiline caption, or a single-line caption
307    let caption_end = pos + 1; // End is exclusive
308
309    // If this line is NOT a caption start, it might be a continuation line
310    // Scan backward through non-blank lines to find the caption start
311    if !is_valid_caption_start_before_table(lines, pos) {
312        // Not a caption start - check if there's a caption start above
313        let mut scan_pos = pos;
314        while scan_pos > 0 {
315            scan_pos -= 1;
316            let line = lines[scan_pos];
317
318            // If we hit a blank line, we've gone too far
319            if line.trim().is_empty() {
320                return None;
321            }
322
323            // If we find a caption start, this is the beginning of the multiline caption
324            if is_valid_caption_start_before_table(lines, scan_pos) {
325                if scan_pos > 0 && !lines[scan_pos - 1].trim().is_empty() {
326                    return None;
327                }
328                if previous_nonblank_looks_like_table(lines, scan_pos) {
329                    return None;
330                }
331                return Some((scan_pos, caption_end));
332            }
333        }
334        // Scanned to beginning without finding caption start
335        None
336    } else {
337        if pos > 0 && !lines[pos - 1].trim().is_empty() {
338            return None;
339        }
340        if previous_nonblank_looks_like_table(lines, pos) {
341            return None;
342        }
343        // This line is a caption start - return the range
344        Some((pos, caption_end))
345    }
346}
347
348fn previous_nonblank_looks_like_table(lines: &[&str], pos: usize) -> bool {
349    if pos == 0 {
350        return false;
351    }
352    let mut i = pos;
353    while i > 0 {
354        i -= 1;
355        let line = lines[i].trim();
356        if line.is_empty() {
357            continue;
358        }
359        return line_looks_like_table_syntax(line);
360    }
361    false
362}
363
364fn line_looks_like_table_syntax(line: &str) -> bool {
365    if line.starts_with('|') && line.matches('|').count() >= 2 {
366        return true;
367    }
368    if line.starts_with('+') && line.ends_with('+') && (line.contains('-') || line.contains('=')) {
369        return true;
370    }
371    try_parse_table_separator(line).is_some()
372        || try_parse_pipe_separator(line).is_some()
373        || try_parse_grid_separator(line).is_some()
374}
375
376/// Find caption after table (if any).
377/// Returns (caption_start, caption_end) positions, or None.
378fn find_caption_after_table(lines: &[&str], table_end: usize) -> Option<(usize, usize)> {
379    if table_end >= lines.len() {
380        return None;
381    }
382
383    let mut pos = table_end;
384
385    // Skip one blank line if present
386    if pos < lines.len() && lines[pos].trim().is_empty() {
387        pos += 1;
388    }
389
390    if pos >= lines.len() {
391        return None;
392    }
393
394    // Check if this line is a caption
395    if is_table_caption_start(lines[pos]) {
396        let caption_start = pos;
397        // Find end of caption (continues until blank line)
398        let mut caption_end = caption_start + 1;
399        while caption_end < lines.len() && !lines[caption_end].trim().is_empty() {
400            caption_end += 1;
401        }
402        Some((caption_start, caption_end))
403    } else {
404        None
405    }
406}
407
408/// Emit a table caption node.
409fn emit_table_caption(
410    builder: &mut GreenNodeBuilder<'static>,
411    lines: &[&str],
412    start: usize,
413    end: usize,
414    config: &ParserOptions,
415) {
416    builder.start_node(SyntaxKind::TABLE_CAPTION.into());
417
418    for (i, line) in lines[start..end].iter().enumerate() {
419        if i == 0 {
420            // First line - parse and emit prefix separately
421            let trimmed = line.trim_start();
422            let leading_ws_len = line.len() - trimmed.len();
423
424            // Emit leading whitespace if present
425            if leading_ws_len > 0 {
426                builder.token(SyntaxKind::WHITESPACE.into(), &line[..leading_ws_len]);
427            }
428
429            // Check for caption prefix and emit separately
430            // Calculate where the prefix ends (after trimmed content)
431            let prefix_and_rest = if line.ends_with('\n') {
432                &line[leading_ws_len..line.len() - 1] // Exclude newline
433            } else {
434                &line[leading_ws_len..]
435            };
436
437            let (prefix_len, prefix_text) = if prefix_and_rest.starts_with("Table: ") {
438                (7, "Table: ")
439            } else if prefix_and_rest.starts_with("table: ") {
440                (7, "table: ")
441            } else if prefix_and_rest.starts_with(": ") {
442                (2, ": ")
443            } else if prefix_and_rest.starts_with(':') {
444                (1, ":")
445            } else {
446                (0, "")
447            };
448
449            if prefix_len > 0 {
450                builder.token(SyntaxKind::TABLE_CAPTION_PREFIX.into(), prefix_text);
451
452                // Emit rest of line after prefix
453                let rest_start = leading_ws_len + prefix_len;
454                if rest_start < line.len() {
455                    // Get the caption text (excluding newline)
456                    let (caption_text, newline_str) = strip_newline(&line[rest_start..]);
457
458                    if !caption_text.is_empty() {
459                        inline_emission::emit_inlines(builder, caption_text, config);
460                    }
461
462                    if !newline_str.is_empty() {
463                        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
464                    }
465                }
466            } else {
467                // No recognized prefix, emit whole trimmed line
468                let (text, newline_str) = strip_newline(&line[leading_ws_len..]);
469
470                if !text.is_empty() {
471                    inline_emission::emit_inlines(builder, text, config);
472                }
473
474                if !newline_str.is_empty() {
475                    builder.token(SyntaxKind::NEWLINE.into(), newline_str);
476                }
477            }
478        } else {
479            // Continuation lines - emit with inline parsing
480            let (text, newline_str) = strip_newline(line);
481
482            if !text.is_empty() {
483                inline_emission::emit_inlines(builder, text, config);
484            }
485
486            if !newline_str.is_empty() {
487                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
488            }
489        }
490    }
491
492    builder.finish_node(); // TABLE_CAPTION
493}
494
495/// Emit a table cell with inline content parsing.
496/// This is the core helper for Phase 7.1 table inline parsing migration.
497fn emit_table_cell(
498    builder: &mut GreenNodeBuilder<'static>,
499    cell_text: &str,
500    config: &ParserOptions,
501) {
502    builder.start_node(SyntaxKind::TABLE_CELL.into());
503
504    // Parse inline content within the cell
505    if !cell_text.is_empty() {
506        inline_emission::emit_inlines(builder, cell_text, config);
507    }
508
509    builder.finish_node(); // TABLE_CELL
510}
511
512/// Determine column alignments based on separator and optional header.
513fn determine_alignments(columns: &mut [Column], separator_line: &str, header_line: Option<&str>) {
514    for col in columns.iter_mut() {
515        let sep_slice = &separator_line[col.start..col.end];
516
517        if let Some(header) = header_line {
518            let header_start = column_offset_to_byte_index(header, col.start);
519            let header_end = column_offset_to_byte_index(header, col.end);
520
521            // Extract header text for this column
522            let header_text = if header_start < header_end {
523                header[header_start..header_end].trim()
524            } else if header_start < header.len() {
525                header[header_start..].trim()
526            } else {
527                ""
528            };
529
530            if header_text.is_empty() {
531                col.alignment = Alignment::Default;
532                continue;
533            }
534
535            // Find where the header text starts and ends within the column
536            let header_in_col = &header[header_start..header_end];
537            let text_start = header_in_col.len() - header_in_col.trim_start().len();
538            let text_end = header_in_col.trim_end().len() + text_start;
539
540            // Check dash alignment relative to text
541            let dashes_start = 0; // Dashes start at beginning of sep_slice
542            let dashes_end = sep_slice.len();
543
544            let flush_left = dashes_start == text_start;
545            let flush_right = dashes_end == text_end;
546
547            col.alignment = match (flush_left, flush_right) {
548                (true, true) => Alignment::Default,
549                (true, false) => Alignment::Left,
550                (false, true) => Alignment::Right,
551                (false, false) => Alignment::Center,
552            };
553        } else {
554            // Without header, alignment based on first row (we'll handle this later)
555            col.alignment = Alignment::Default;
556        }
557    }
558}
559
560/// Try to parse a simple table starting at the given position.
561/// Returns the number of lines consumed if successful.
562pub(crate) fn try_parse_simple_table(
563    lines: &[&str],
564    start_pos: usize,
565    builder: &mut GreenNodeBuilder<'static>,
566    config: &ParserOptions,
567) -> Option<usize> {
568    log::trace!("try_parse_simple_table at line {}", start_pos + 1);
569
570    if start_pos >= lines.len() {
571        return None;
572    }
573
574    // Look for a separator line
575    let separator_pos = find_separator_line(lines, start_pos)?;
576    log::trace!("  found separator at line {}", separator_pos + 1);
577
578    let separator_line = lines[separator_pos];
579    let mut columns = try_parse_table_separator(separator_line)?;
580
581    // Determine if there's a header (separator not at start)
582    let has_header = separator_pos > start_pos;
583    let header_line = if has_header {
584        Some(lines[separator_pos - 1])
585    } else {
586        None
587    };
588
589    // Determine alignments
590    determine_alignments(&mut columns, separator_line, header_line);
591
592    // Find table end (blank line or end of input)
593    let end_pos = find_table_end(lines, separator_pos + 1);
594
595    // Must have at least one data row (or it's just a separator)
596    let data_rows = end_pos - separator_pos - 1;
597
598    if data_rows == 0 {
599        return None;
600    }
601
602    // Check for caption before table
603    let caption_before = find_caption_before_table(lines, start_pos);
604
605    // Check for caption after table
606    let caption_after = if caption_before.is_some() {
607        None
608    } else {
609        find_caption_after_table(lines, end_pos)
610    };
611
612    // Build the table
613    builder.start_node(SyntaxKind::SIMPLE_TABLE.into());
614
615    // Emit caption before if present
616    if let Some((cap_start, cap_end)) = caption_before {
617        emit_table_caption(builder, lines, cap_start, cap_end, config);
618
619        // Emit blank line between caption and table if present
620        if cap_end < start_pos {
621            for line in lines.iter().take(start_pos).skip(cap_end) {
622                if line.trim().is_empty() {
623                    builder.start_node(SyntaxKind::BLANK_LINE.into());
624                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
625                    builder.finish_node();
626                }
627            }
628        }
629    }
630
631    // Emit header if present
632    if has_header {
633        emit_table_row(
634            builder,
635            lines[separator_pos - 1],
636            &columns,
637            SyntaxKind::TABLE_HEADER,
638            config,
639        );
640    }
641
642    // Emit separator
643    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
644    emit_line_tokens(builder, separator_line);
645    builder.finish_node();
646
647    // Emit data rows
648    for line in lines.iter().take(end_pos).skip(separator_pos + 1) {
649        emit_table_row(builder, line, &columns, SyntaxKind::TABLE_ROW, config);
650    }
651
652    // Emit caption after if present
653    if let Some((cap_start, cap_end)) = caption_after {
654        // Emit blank line before caption if needed
655        if cap_start > end_pos {
656            for line in lines.iter().take(cap_start).skip(end_pos) {
657                if line.trim().is_empty() {
658                    builder.start_node(SyntaxKind::BLANK_LINE.into());
659                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
660                    builder.finish_node();
661                }
662            }
663        }
664        emit_table_caption(builder, lines, cap_start, cap_end, config);
665    }
666
667    builder.finish_node(); // SimpleTable
668
669    // Calculate lines consumed (including captions)
670    let table_start = if let Some((cap_start, _)) = caption_before {
671        cap_start
672    } else if has_header {
673        separator_pos - 1
674    } else {
675        separator_pos
676    };
677
678    let table_end = if let Some((_, cap_end)) = caption_after {
679        cap_end
680    } else {
681        end_pos
682    };
683
684    let lines_consumed = table_end - table_start;
685
686    Some(lines_consumed)
687}
688
689/// Find the position of a separator line starting from pos.
690fn find_separator_line(lines: &[&str], start_pos: usize) -> Option<usize> {
691    log::trace!("  find_separator_line from line {}", start_pos + 1);
692
693    // Check first line
694    log::trace!("    checking first line: {:?}", lines[start_pos]);
695    if try_parse_table_separator(lines[start_pos]).is_some() {
696        log::trace!("    separator found at first line");
697        return Some(start_pos);
698    }
699
700    // Check second line (for table with header)
701    if start_pos + 1 < lines.len()
702        && !lines[start_pos].trim().is_empty()
703        && try_parse_table_separator(lines[start_pos + 1]).is_some()
704    {
705        return Some(start_pos + 1);
706    }
707    None
708}
709
710/// Find where the table ends (first blank line or end of input).
711fn find_table_end(lines: &[&str], start_pos: usize) -> usize {
712    for i in start_pos..lines.len() {
713        if lines[i].trim().is_empty() {
714            return i;
715        }
716        // Check if this could be a closing separator
717        if try_parse_table_separator(lines[i]).is_some() {
718            // Check if next line is blank or end
719            if i + 1 >= lines.len() || lines[i + 1].trim().is_empty() {
720                return i + 1;
721            }
722        }
723    }
724    lines.len()
725}
726
727/// Emit a table row (header or data row) with inline-parsed cells for simple tables.
728/// Uses column boundaries from the separator line to extract cells.
729fn emit_table_row(
730    builder: &mut GreenNodeBuilder<'static>,
731    line: &str,
732    columns: &[Column],
733    row_kind: SyntaxKind,
734    config: &ParserOptions,
735) {
736    builder.start_node(row_kind.into());
737
738    let (line_without_newline, newline_str) = strip_newline(line);
739
740    // Emit leading whitespace if present
741    let trimmed = line_without_newline.trim_start();
742    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
743    if leading_ws_len > 0 {
744        builder.token(
745            SyntaxKind::WHITESPACE.into(),
746            &line_without_newline[..leading_ws_len],
747        );
748    }
749
750    // Track where we are in the line (for losslessness)
751    let mut current_pos = 0;
752
753    // Extract and emit cells based on column boundaries
754    for col in columns.iter() {
755        // Calculate actual positions in the trimmed line (accounting for leading whitespace)
756        let cell_start = if col.start >= leading_ws_len {
757            column_offset_to_byte_index(trimmed, col.start - leading_ws_len)
758        } else {
759            0
760        };
761
762        let cell_end = if col.end >= leading_ws_len {
763            column_offset_to_byte_index(trimmed, col.end - leading_ws_len)
764        } else {
765            0
766        };
767
768        // Extract cell text from column bounds. When the column lies entirely
769        // before the trimmed content (col.end <= leading_ws_len) both bounds
770        // clamp to 0; treat that as an empty cell rather than re-emitting the
771        // whole row.
772        let cell_text = if cell_start < cell_end && cell_start < trimmed.len() {
773            &trimmed[cell_start..cell_end]
774        } else {
775            ""
776        };
777
778        let cell_content = cell_text.trim();
779        let cell_content_start = cell_text.len() - cell_text.trim_start().len();
780
781        // Emit any whitespace from current position to start of cell content
782        let content_abs_pos = (cell_start + cell_content_start).min(trimmed.len());
783        if current_pos < content_abs_pos {
784            builder.token(
785                SyntaxKind::WHITESPACE.into(),
786                &trimmed[current_pos..content_abs_pos],
787            );
788        }
789
790        // Emit cell with inline parsing
791        emit_table_cell(builder, cell_content, config);
792
793        // Update current position to end of cell content
794        current_pos = content_abs_pos + cell_content.len();
795    }
796
797    // Emit any remaining whitespace after last cell
798    if current_pos < trimmed.len() {
799        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
800    }
801
802    // Emit newline if present
803    if !newline_str.is_empty() {
804        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
805    }
806
807    builder.finish_node();
808}
809
810// ============================================================================
811// Pipe Table Parsing
812// ============================================================================
813
814/// Check if a line is a pipe table separator line.
815/// Returns the column alignments if it's a valid separator.
816fn try_parse_pipe_separator(line: &str) -> Option<Vec<Alignment>> {
817    let trimmed = line.trim();
818
819    // Must contain at least one pipe
820    if !trimmed.contains('|') && !trimmed.contains('+') {
821        return None;
822    }
823
824    // Split by pipes (or + for orgtbl variant)
825    let cells: Vec<&str> = if trimmed.contains('+') {
826        // Orgtbl variant: use + as separator in separator line
827        trimmed.split(['|', '+']).collect()
828    } else {
829        trimmed.split('|').collect()
830    };
831
832    let mut alignments = Vec::new();
833
834    for cell in cells {
835        let cell = cell.trim();
836
837        // Skip empty cells (from leading/trailing pipes)
838        if cell.is_empty() {
839            continue;
840        }
841
842        // Must be dashes with optional colons
843        let starts_colon = cell.starts_with(':');
844        let ends_colon = cell.ends_with(':');
845
846        // Remove colons to check if rest is all dashes
847        let without_colons = cell.trim_start_matches(':').trim_end_matches(':');
848
849        // Must have at least one dash
850        if without_colons.is_empty() || !without_colons.chars().all(|c| c == '-') {
851            return None;
852        }
853
854        // Determine alignment from colon positions
855        let alignment = match (starts_colon, ends_colon) {
856            (true, true) => Alignment::Center,
857            (true, false) => Alignment::Left,
858            (false, true) => Alignment::Right,
859            (false, false) => Alignment::Default,
860        };
861
862        alignments.push(alignment);
863    }
864
865    // Must have at least one column
866    if alignments.is_empty() {
867        None
868    } else {
869        Some(alignments)
870    }
871}
872
873/// Split a pipe table row into cells.
874/// Handles escaped pipes (\|) properly by not splitting on them.
875fn parse_pipe_table_row(line: &str) -> Vec<String> {
876    let trimmed = line.trim();
877
878    let mut cells = Vec::new();
879    let mut current_cell = String::new();
880    let mut chars = trimmed.chars().peekable();
881    let mut char_count = 0;
882
883    while let Some(ch) = chars.next() {
884        char_count += 1;
885        match ch {
886            '\\' => {
887                // Check if next char is a pipe - if so, it's an escaped pipe
888                if let Some(&'|') = chars.peek() {
889                    current_cell.push('\\');
890                    current_cell.push('|');
891                    chars.next(); // consume the pipe
892                } else {
893                    current_cell.push(ch);
894                }
895            }
896            '|' => {
897                // Check if this is the leading pipe (first character)
898                if char_count == 1 {
899                    continue; // Skip leading pipe
900                }
901
902                // End current cell, start new one
903                cells.push(current_cell.trim().to_string());
904                current_cell.clear();
905            }
906            _ => {
907                current_cell.push(ch);
908            }
909        }
910    }
911
912    // Add last cell if it's not empty (it would be empty if line ended with pipe)
913    let trimmed_cell = current_cell.trim().to_string();
914    if !trimmed_cell.is_empty() {
915        cells.push(trimmed_cell);
916    }
917
918    cells
919}
920
921/// Emit a pipe table row with inline-parsed cells.
922/// Preserves losslessness by emitting exact byte representation while parsing cell content inline.
923fn emit_pipe_table_row(
924    builder: &mut GreenNodeBuilder<'static>,
925    line: &str,
926    row_kind: SyntaxKind,
927    config: &ParserOptions,
928) {
929    builder.start_node(row_kind.into());
930
931    let (line_without_newline, newline_str) = strip_newline(line);
932    let trimmed = line_without_newline.trim();
933
934    // Parse cell boundaries
935    let mut cell_starts = Vec::new();
936    let mut cell_ends = Vec::new();
937    let mut in_escape = false;
938
939    // Find all pipe positions (excluding escaped ones)
940    let mut pipe_positions = Vec::new();
941    for (i, ch) in trimmed.char_indices() {
942        if in_escape {
943            in_escape = false;
944            continue;
945        }
946        if ch == '\\' {
947            in_escape = true;
948            continue;
949        }
950        if ch == '|' {
951            pipe_positions.push(i);
952        }
953    }
954
955    // Determine cell boundaries based on pipe positions
956    if pipe_positions.is_empty() {
957        // No pipes - treat entire line as one cell (shouldn't happen for valid pipe tables)
958        cell_starts.push(0);
959        cell_ends.push(trimmed.len());
960    } else {
961        // Check if line starts with pipe
962        let start_pipe = pipe_positions.first() == Some(&0);
963        // Check if line ends with pipe
964        let end_pipe = pipe_positions.last() == Some(&(trimmed.len() - 1));
965
966        if start_pipe {
967            // Skip first pipe
968            for i in 1..pipe_positions.len() {
969                cell_starts.push(pipe_positions[i - 1] + 1);
970                cell_ends.push(pipe_positions[i]);
971            }
972            // Add last cell if there's no trailing pipe
973            if !end_pipe {
974                cell_starts.push(*pipe_positions.last().unwrap() + 1);
975                cell_ends.push(trimmed.len());
976            }
977        } else {
978            // No leading pipe
979            cell_starts.push(0);
980            cell_ends.push(pipe_positions[0]);
981
982            for i in 1..pipe_positions.len() {
983                cell_starts.push(pipe_positions[i - 1] + 1);
984                cell_ends.push(pipe_positions[i]);
985            }
986
987            // Add last cell if there's no trailing pipe
988            if !end_pipe {
989                cell_starts.push(*pipe_positions.last().unwrap() + 1);
990                cell_ends.push(trimmed.len());
991            }
992        }
993    }
994
995    // Emit leading whitespace if present (before trim)
996    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
997    if leading_ws_len > 0 {
998        builder.token(
999            SyntaxKind::WHITESPACE.into(),
1000            &line_without_newline[..leading_ws_len],
1001        );
1002    }
1003
1004    // Emit cells with pipes
1005    for (idx, (start, end)) in cell_starts.iter().zip(cell_ends.iter()).enumerate() {
1006        // Emit pipe before cell (except for first cell if no leading pipe)
1007        if *start > 0 {
1008            builder.token(SyntaxKind::TEXT.into(), "|");
1009        } else if idx == 0 && trimmed.starts_with('|') {
1010            // Leading pipe
1011            builder.token(SyntaxKind::TEXT.into(), "|");
1012        }
1013
1014        // Get cell content with its whitespace
1015        let cell_with_ws = &trimmed[*start..*end];
1016        let cell_content = cell_with_ws.trim();
1017
1018        // Emit leading whitespace within cell
1019        let cell_leading_ws = &cell_with_ws[..cell_with_ws.len() - cell_with_ws.trim_start().len()];
1020        if !cell_leading_ws.is_empty() {
1021            builder.token(SyntaxKind::WHITESPACE.into(), cell_leading_ws);
1022        }
1023
1024        // Emit cell with inline parsing
1025        emit_table_cell(builder, cell_content, config);
1026
1027        // Emit trailing whitespace within cell
1028        let cell_trailing_ws_start = cell_leading_ws.len() + cell_content.len();
1029        if cell_trailing_ws_start < cell_with_ws.len() {
1030            builder.token(
1031                SyntaxKind::WHITESPACE.into(),
1032                &cell_with_ws[cell_trailing_ws_start..],
1033            );
1034        }
1035    }
1036
1037    // Emit trailing pipe if present
1038    if !pipe_positions.is_empty() && trimmed.ends_with('|') {
1039        builder.token(SyntaxKind::TEXT.into(), "|");
1040    }
1041
1042    // Emit trailing whitespace after trim (before newline)
1043    let trailing_ws_start = leading_ws_len + trimmed.len();
1044    if trailing_ws_start < line_without_newline.len() {
1045        builder.token(
1046            SyntaxKind::WHITESPACE.into(),
1047            &line_without_newline[trailing_ws_start..],
1048        );
1049    }
1050
1051    // Emit newline
1052    if !newline_str.is_empty() {
1053        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1054    }
1055
1056    builder.finish_node();
1057}
1058
1059/// Try to parse a pipe table starting at the given position.
1060/// Returns the number of lines consumed if successful.
1061pub(crate) fn try_parse_pipe_table(
1062    lines: &[&str],
1063    start_pos: usize,
1064    builder: &mut GreenNodeBuilder<'static>,
1065    config: &ParserOptions,
1066) -> Option<usize> {
1067    if start_pos + 1 >= lines.len() {
1068        return None;
1069    }
1070
1071    // Check if this line is a caption followed by a table
1072    // If so, the actual table starts after the caption and blank line
1073    let (actual_start, caption_before) = if is_caption_followed_by_table(lines, start_pos) {
1074        let (cap_start, cap_end) = caption_range_starting_at(lines, start_pos)?;
1075        let mut pos = cap_end;
1076        while pos < lines.len() && lines[pos].trim().is_empty() {
1077            pos += 1;
1078        }
1079        (pos, Some((cap_start, cap_end)))
1080    } else {
1081        (start_pos, None)
1082    };
1083
1084    if actual_start + 1 >= lines.len() {
1085        return None;
1086    }
1087
1088    // First line should have pipes (potential header)
1089    let header_line = lines[actual_start];
1090    if !header_line.contains('|') {
1091        return None;
1092    }
1093
1094    // Second line should be separator
1095    let separator_line = lines[actual_start + 1];
1096    let alignments = try_parse_pipe_separator(separator_line)?;
1097
1098    // Parse header cells
1099    let header_cells = parse_pipe_table_row(header_line);
1100
1101    // Number of columns should match (approximately - be lenient)
1102    if header_cells.len() != alignments.len() && !header_cells.is_empty() {
1103        // Only fail if very different
1104        if header_cells.len() < alignments.len() / 2 || header_cells.len() > alignments.len() * 2 {
1105            return None;
1106        }
1107    }
1108
1109    // Find table end (first blank line or end of input)
1110    let mut end_pos = actual_start + 2;
1111    while end_pos < lines.len() {
1112        let line = lines[end_pos];
1113        if line.trim().is_empty() {
1114            break;
1115        }
1116        // Row should have pipes
1117        if !line.contains('|') {
1118            break;
1119        }
1120        end_pos += 1;
1121    }
1122
1123    // Must have at least one data row
1124    if end_pos <= actual_start + 2 {
1125        return None;
1126    }
1127
1128    // Check for caption before table (only if we didn't already detect it)
1129    let caption_before = caption_before.or_else(|| find_caption_before_table(lines, actual_start));
1130
1131    // Check for caption after table
1132    let caption_after = if caption_before.is_some() {
1133        None
1134    } else {
1135        find_caption_after_table(lines, end_pos)
1136    };
1137
1138    // Build the pipe table
1139    builder.start_node(SyntaxKind::PIPE_TABLE.into());
1140
1141    // Emit caption before if present
1142    if let Some((cap_start, cap_end)) = caption_before {
1143        emit_table_caption(builder, lines, cap_start, cap_end, config);
1144        // Emit blank line between caption and table if present
1145        if cap_end < actual_start {
1146            for line in lines.iter().take(actual_start).skip(cap_end) {
1147                if line.trim().is_empty() {
1148                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1149                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1150                    builder.finish_node();
1151                }
1152            }
1153        }
1154    }
1155
1156    // Emit header row with inline-parsed cells
1157    emit_pipe_table_row(builder, header_line, SyntaxKind::TABLE_HEADER, config);
1158
1159    // Emit separator
1160    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1161    emit_line_tokens(builder, separator_line);
1162    builder.finish_node();
1163
1164    // Emit data rows with inline-parsed cells
1165    for line in lines.iter().take(end_pos).skip(actual_start + 2) {
1166        emit_pipe_table_row(builder, line, SyntaxKind::TABLE_ROW, config);
1167    }
1168
1169    // Emit caption after if present
1170    if let Some((cap_start, cap_end)) = caption_after {
1171        // Emit blank line before caption if needed
1172        if cap_start > end_pos {
1173            for line in lines.iter().take(cap_start).skip(end_pos) {
1174                if line.trim().is_empty() {
1175                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1176                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1177                    builder.finish_node();
1178                }
1179            }
1180        }
1181        emit_table_caption(builder, lines, cap_start, cap_end, config);
1182    }
1183
1184    builder.finish_node(); // PipeTable
1185
1186    // Calculate lines consumed
1187    let table_start = caption_before
1188        .map(|(start, _)| start)
1189        .unwrap_or(actual_start);
1190    let table_end = if let Some((_, cap_end)) = caption_after {
1191        cap_end
1192    } else {
1193        end_pos
1194    };
1195
1196    Some(table_end - table_start)
1197}
1198
1199#[cfg(test)]
1200mod tests {
1201    use super::*;
1202
1203    #[test]
1204    fn test_separator_detection() {
1205        assert!(try_parse_table_separator("------- ------ ----------   -------").is_some());
1206        assert!(try_parse_table_separator("  ---  ---  ---").is_some());
1207        assert!(try_parse_table_separator("-------").is_none()); // horizontal rule
1208        assert!(try_parse_table_separator("--- --- ---").is_some()); // table separator
1209    }
1210
1211    #[test]
1212    fn test_column_extraction() {
1213        let line = "-------     ------ ----------   -------";
1214        let columns = extract_columns(line, 0);
1215        assert_eq!(columns.len(), 4);
1216    }
1217
1218    #[test]
1219    fn test_simple_table_with_header() {
1220        let input = vec![
1221            "  Right     Left     Center     Default",
1222            "-------     ------ ----------   -------",
1223            "     12     12        12            12",
1224            "    123     123       123          123",
1225            "",
1226        ];
1227
1228        let mut builder = GreenNodeBuilder::new();
1229        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1230
1231        assert!(result.is_some());
1232        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1233    }
1234
1235    #[test]
1236    fn test_headerless_table() {
1237        let input = vec![
1238            "-------     ------ ----------   -------",
1239            "     12     12        12            12",
1240            "    123     123       123          123",
1241            "",
1242        ];
1243
1244        let mut builder = GreenNodeBuilder::new();
1245        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1246
1247        assert!(result.is_some());
1248        assert_eq!(result.unwrap(), 3); // sep + 2 rows
1249    }
1250
1251    #[test]
1252    fn test_caption_prefix_detection() {
1253        assert!(try_parse_caption_prefix("Table: My caption").is_some());
1254        assert!(try_parse_caption_prefix("table: My caption").is_some());
1255        assert!(try_parse_caption_prefix(": My caption").is_some());
1256        assert!(try_parse_caption_prefix(":").is_none()); // Just colon, no content
1257        assert!(try_parse_caption_prefix("Not a caption").is_none());
1258    }
1259
1260    #[test]
1261    fn bare_colon_fenced_code_is_not_table_caption() {
1262        let input = "Term\n: ```\n  code\n  ```\n";
1263        let tree = crate::parse(input, None);
1264
1265        assert!(
1266            tree.descendants()
1267                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1268            "should parse as definition list"
1269        );
1270        assert!(
1271            tree.descendants()
1272                .any(|node| node.kind() == SyntaxKind::CODE_BLOCK),
1273            "definition should preserve fenced code block"
1274        );
1275        assert!(
1276            !tree
1277                .descendants()
1278                .any(|node| node.kind() == SyntaxKind::TABLE_CAPTION),
1279            "fenced code definition should not be parsed as table caption"
1280        );
1281    }
1282
1283    #[test]
1284    fn bare_colon_caption_after_div_opening_is_table_caption() {
1285        let input = "::: {#tbl:panel layout.nrow=\"1\"}\n  : My Caption {#tbl:foo-1}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\n  : My Caption2 {#tbl:foo-2}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\nCaption\n:::\n";
1286        let tree = crate::parse(input, None);
1287
1288        let caption_count = tree
1289            .descendants()
1290            .filter(|node| node.kind() == SyntaxKind::TABLE_CAPTION)
1291            .count();
1292        assert_eq!(
1293            caption_count, 2,
1294            "expected both captions to attach to tables"
1295        );
1296        assert!(
1297            !tree
1298                .descendants()
1299                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1300            "caption lines in this fenced div table layout should not parse as definition list"
1301        );
1302    }
1303
1304    #[test]
1305    fn test_table_with_caption_after() {
1306        let input = vec![
1307            "  Right     Left     Center     Default",
1308            "-------     ------ ----------   -------",
1309            "     12     12        12            12",
1310            "    123     123       123          123",
1311            "",
1312            "Table: Demonstration of simple table syntax.",
1313            "",
1314        ];
1315
1316        let mut builder = GreenNodeBuilder::new();
1317        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1318
1319        assert!(result.is_some());
1320        // Should consume: header + sep + 2 rows + blank + caption
1321        assert_eq!(result.unwrap(), 6);
1322    }
1323
1324    #[test]
1325    fn test_table_with_caption_before() {
1326        let input = vec![
1327            "Table: Demonstration of simple table syntax.",
1328            "",
1329            "  Right     Left     Center     Default",
1330            "-------     ------ ----------   -------",
1331            "     12     12        12            12",
1332            "    123     123       123          123",
1333            "",
1334        ];
1335
1336        let mut builder = GreenNodeBuilder::new();
1337        let result = try_parse_simple_table(&input, 2, &mut builder, &ParserOptions::default());
1338
1339        assert!(result.is_some());
1340        // Should consume: caption + blank + header + sep + 2 rows
1341        assert_eq!(result.unwrap(), 6);
1342    }
1343
1344    #[test]
1345    fn test_caption_with_colon_prefix() {
1346        let input = vec![
1347            "  Right     Left",
1348            "-------     ------",
1349            "     12     12",
1350            "",
1351            ": Short caption",
1352            "",
1353        ];
1354
1355        let mut builder = GreenNodeBuilder::new();
1356        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1357
1358        assert!(result.is_some());
1359        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1360    }
1361
1362    #[test]
1363    fn test_multiline_caption() {
1364        let input = vec![
1365            "  Right     Left",
1366            "-------     ------",
1367            "     12     12",
1368            "",
1369            "Table: This is a longer caption",
1370            "that spans multiple lines.",
1371            "",
1372        ];
1373
1374        let mut builder = GreenNodeBuilder::new();
1375        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1376
1377        assert!(result.is_some());
1378        // Should consume through end of multi-line caption
1379        assert_eq!(result.unwrap(), 6);
1380    }
1381
1382    #[test]
1383    fn test_simple_table_with_multibyte_cell_content() {
1384        let input = vec![
1385            "Name            Hex code     Hue     C, M, Y, K (%)   R, G, B (0-255)   R, G, B (%)",
1386            "--------------  ------------ ------- ---------------- ----------------- ------------",
1387            "        orange       #E69F00     41° 0, 50, 100, 0    230, 159, 0       90, 60, 0",
1388            "      sky blue       #56B4E9    202° 80, 0, 0, 0      86, 180, 233      35, 70, 90",
1389            "",
1390        ];
1391
1392        let mut builder = GreenNodeBuilder::new();
1393        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1394
1395        assert!(result.is_some());
1396        assert_eq!(result.unwrap(), 4);
1397    }
1398
1399    // Pipe table tests
1400    #[test]
1401    fn test_pipe_separator_detection() {
1402        assert!(try_parse_pipe_separator("|------:|:-----|---------|:------:|").is_some());
1403        assert!(try_parse_pipe_separator("|---|---|").is_some());
1404        assert!(try_parse_pipe_separator("-----|-----:").is_some()); // No leading pipe
1405        assert!(try_parse_pipe_separator("|-----+-------|").is_some()); // Orgtbl variant
1406        assert!(try_parse_pipe_separator("not a separator").is_none());
1407    }
1408
1409    #[test]
1410    fn test_pipe_alignments() {
1411        let aligns = try_parse_pipe_separator("|------:|:-----|---------|:------:|").unwrap();
1412        assert_eq!(aligns.len(), 4);
1413        assert_eq!(aligns[0], Alignment::Right);
1414        assert_eq!(aligns[1], Alignment::Left);
1415        assert_eq!(aligns[2], Alignment::Default);
1416        assert_eq!(aligns[3], Alignment::Center);
1417    }
1418
1419    #[test]
1420    fn test_parse_pipe_table_row() {
1421        let cells = parse_pipe_table_row("| Right | Left | Center |");
1422        assert_eq!(cells.len(), 3);
1423        assert_eq!(cells[0], "Right");
1424        assert_eq!(cells[1], "Left");
1425        assert_eq!(cells[2], "Center");
1426
1427        // Without leading/trailing pipes
1428        let cells2 = parse_pipe_table_row("Right | Left | Center");
1429        assert_eq!(cells2.len(), 3);
1430    }
1431
1432    #[test]
1433    fn test_basic_pipe_table() {
1434        let input = vec![
1435            "",
1436            "| Right | Left | Center |",
1437            "|------:|:-----|:------:|",
1438            "|   12  |  12  |   12   |",
1439            "|  123  |  123 |  123   |",
1440            "",
1441        ];
1442
1443        let mut builder = GreenNodeBuilder::new();
1444        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1445
1446        assert!(result.is_some());
1447        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1448    }
1449
1450    #[test]
1451    fn test_pipe_table_no_edge_pipes() {
1452        let input = vec![
1453            "",
1454            "fruit| price",
1455            "-----|-----:",
1456            "apple|2.05",
1457            "pear|1.37",
1458            "",
1459        ];
1460
1461        let mut builder = GreenNodeBuilder::new();
1462        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1463
1464        assert!(result.is_some());
1465        assert_eq!(result.unwrap(), 4);
1466    }
1467
1468    #[test]
1469    fn test_pipe_table_with_caption() {
1470        let input = vec![
1471            "",
1472            "| Col1 | Col2 |",
1473            "|------|------|",
1474            "| A    | B    |",
1475            "",
1476            "Table: My pipe table",
1477            "",
1478        ];
1479
1480        let mut builder = GreenNodeBuilder::new();
1481        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1482
1483        assert!(result.is_some());
1484        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1485    }
1486
1487    #[test]
1488    fn test_pipe_table_with_multiline_caption_before() {
1489        let input = vec![
1490            ": (#tab:base) base R quoting",
1491            "functions",
1492            "",
1493            "| C | D |",
1494            "|---|---|",
1495            "| 3 | 4 |",
1496            "",
1497        ];
1498
1499        let mut builder = GreenNodeBuilder::new();
1500        let result = try_parse_pipe_table(&input, 0, &mut builder, &ParserOptions::default());
1501
1502        assert!(result.is_some());
1503        // caption(2) + blank(1) + header + sep + row
1504        assert_eq!(result.unwrap(), 6);
1505    }
1506}
1507
1508// ============================================================================
1509// Grid Table Parsing
1510// ============================================================================
1511
1512/// Check if a line is a grid table row separator (starts with +, contains -, ends with +).
1513/// Returns Some(vec of column info) if valid, None otherwise.
1514fn try_parse_grid_separator(line: &str) -> Option<Vec<GridColumn>> {
1515    let trimmed = line.trim_start();
1516    let leading_spaces = line.len() - trimmed.len();
1517
1518    // Must have leading spaces <= 3 to not be a code block
1519    if leading_spaces > 3 {
1520        return None;
1521    }
1522
1523    // Must start with + and end with +
1524    if !trimmed.starts_with('+') || !trimmed.trim_end().ends_with('+') {
1525        return None;
1526    }
1527
1528    // Split by + to get column segments
1529    let trimmed = trimmed.trim_end();
1530    let segments: Vec<&str> = trimmed.split('+').collect();
1531
1532    // Need at least 3 parts: empty before first +, column(s), empty after last +
1533    if segments.len() < 3 {
1534        return None;
1535    }
1536
1537    let mut columns = Vec::new();
1538
1539    // Parse each segment between + signs
1540    for segment in segments.iter().skip(1).take(segments.len() - 2) {
1541        if segment.is_empty() {
1542            continue;
1543        }
1544
1545        // Segment must be dashes/equals with optional colons for alignment
1546        let seg_trimmed = *segment;
1547
1548        // Get the fill character (after removing colons)
1549        let inner = seg_trimmed.trim_start_matches(':').trim_end_matches(':');
1550
1551        // Must be all dashes or all equals
1552        if inner.is_empty() {
1553            return None;
1554        }
1555
1556        let first_char = inner.chars().next().unwrap();
1557        if first_char != '-' && first_char != '=' {
1558            return None;
1559        }
1560
1561        if !inner.chars().all(|c| c == first_char) {
1562            return None;
1563        }
1564
1565        let is_header_sep = first_char == '=';
1566
1567        columns.push(GridColumn {
1568            is_header_separator: is_header_sep,
1569            width: seg_trimmed.chars().count(),
1570        });
1571    }
1572
1573    if columns.is_empty() {
1574        None
1575    } else {
1576        Some(columns)
1577    }
1578}
1579
1580/// Column information for grid tables.
1581#[derive(Debug, Clone)]
1582struct GridColumn {
1583    is_header_separator: bool,
1584    width: usize,
1585}
1586
1587fn slice_cell_by_display_width(line: &str, start_byte: usize, width: usize) -> (usize, usize) {
1588    let mut end_byte = start_byte;
1589    let mut display_cols = 0usize;
1590
1591    for (offset, ch) in line[start_byte..].char_indices() {
1592        if ch == '|' {
1593            let sep_byte = start_byte + offset;
1594            return (sep_byte, sep_byte + 1);
1595        }
1596        let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
1597        if display_cols + ch_width > width {
1598            break;
1599        }
1600        display_cols += ch_width;
1601        end_byte = start_byte + offset + ch.len_utf8();
1602        if display_cols >= width {
1603            break;
1604        }
1605    }
1606
1607    // If the width budget is exhausted before seeing a separator (for example
1608    // because of padding/layout drift), advance to the next literal separator
1609    // to keep row slicing aligned and preserve losslessness.
1610    let mut sep_byte = end_byte;
1611    while sep_byte < line.len() {
1612        let mut chars = line[sep_byte..].chars();
1613        let Some(ch) = chars.next() else {
1614            break;
1615        };
1616        if ch == '|' {
1617            return (sep_byte, sep_byte + 1);
1618        }
1619        sep_byte += ch.len_utf8();
1620    }
1621
1622    (end_byte, end_byte)
1623}
1624
1625/// Check if a line is a grid table content row.
1626/// Accepts normal rows ending with `|` and spanning-style continuation lines ending with `+`.
1627fn is_grid_content_row(line: &str) -> bool {
1628    let trimmed = line.trim_start();
1629    let leading_spaces = line.len() - trimmed.len();
1630
1631    if leading_spaces > 3 {
1632        return false;
1633    }
1634
1635    let trimmed = trimmed.trim_end();
1636    trimmed.starts_with('|') && (trimmed.ends_with('|') || trimmed.ends_with('+'))
1637}
1638
1639/// Extract cell contents from a single grid table row line.
1640/// Returns a vector of cell contents (trimmed) based on column boundaries.
1641/// Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1642fn extract_grid_cells_from_line(line: &str, _columns: &[GridColumn]) -> Vec<String> {
1643    let (line_content, _) = strip_newline(line);
1644    let line_trimmed = line_content.trim();
1645
1646    if !line_trimmed.starts_with('|') || !line_trimmed.ends_with('|') {
1647        return vec![String::new(); _columns.len()];
1648    }
1649
1650    let mut cells = Vec::with_capacity(_columns.len());
1651    let mut pos_byte = 1; // Skip leading pipe
1652
1653    for col in _columns {
1654        let col_idx = cells.len();
1655        if pos_byte >= line_trimmed.len() {
1656            cells.push(String::new());
1657            continue;
1658        }
1659
1660        let start_byte = pos_byte;
1661        let end_byte = if col_idx + 1 == _columns.len() {
1662            line_trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1663        } else {
1664            let (end, next_start) = slice_cell_by_display_width(line_trimmed, pos_byte, col.width);
1665            pos_byte = next_start;
1666            end
1667        };
1668        cells.push(line_trimmed[start_byte..end_byte].trim().to_string());
1669        if col_idx + 1 == _columns.len() {
1670            pos_byte = line_trimmed.len();
1671        }
1672    }
1673
1674    cells
1675}
1676
1677/// Extract cell contents from multiple grid table row lines (for multi-line cells).
1678/// Concatenates cell contents across lines with newlines, then trims.
1679fn extract_grid_cells_multiline(lines: &[&str], columns: &[GridColumn]) -> Vec<String> {
1680    if lines.is_empty() {
1681        return vec![String::new(); columns.len()];
1682    }
1683
1684    extract_grid_cells_from_line(lines[0], columns)
1685}
1686
1687/// Emit a grid table row with inline-parsed cells.
1688/// Handles multi-line rows by emitting first line with TABLE_CELL nodes,
1689/// then continuation lines as raw TEXT for losslessness.
1690fn emit_grid_table_row(
1691    builder: &mut GreenNodeBuilder<'static>,
1692    lines: &[&str],
1693    columns: &[GridColumn],
1694    row_kind: SyntaxKind,
1695    config: &ParserOptions,
1696) {
1697    if lines.is_empty() {
1698        return;
1699    }
1700
1701    // Extract cell contents from the first line.
1702    let cell_contents = extract_grid_cells_multiline(lines, columns);
1703
1704    builder.start_node(row_kind.into());
1705
1706    // Emit first line with TABLE_CELL nodes
1707    // Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1708    let first_line = lines[0];
1709    let (line_without_newline, newline_str) = strip_newline(first_line);
1710    let trimmed = line_without_newline.trim();
1711    let expected_pipe_count = columns.len().saturating_add(1);
1712    let actual_pipe_count = trimmed.chars().filter(|&c| c == '|').count();
1713
1714    // Rows that don't contain all expected column separators (spanning-style rows)
1715    // must be emitted verbatim for losslessness.
1716    if actual_pipe_count != expected_pipe_count {
1717        emit_line_tokens(builder, first_line);
1718        for line in lines.iter().skip(1) {
1719            emit_line_tokens(builder, line);
1720        }
1721        builder.finish_node();
1722        return;
1723    }
1724
1725    // Emit leading whitespace
1726    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1727    if leading_ws_len > 0 {
1728        builder.token(
1729            SyntaxKind::WHITESPACE.into(),
1730            &line_without_newline[..leading_ws_len],
1731        );
1732    }
1733
1734    // Emit leading pipe
1735    if trimmed.starts_with('|') {
1736        builder.token(SyntaxKind::TEXT.into(), "|");
1737    }
1738
1739    // Emit each cell based on fixed column widths from separators
1740    let mut pos_byte = 1usize; // after leading pipe
1741    for (idx, cell_content) in cell_contents.iter().enumerate() {
1742        let part = if idx < columns.len() && pos_byte <= trimmed.len() {
1743            let start_byte = pos_byte;
1744            let end_byte = if idx + 1 == columns.len() && !trimmed.is_empty() {
1745                trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1746            } else {
1747                let (end, next_start) =
1748                    slice_cell_by_display_width(trimmed, pos_byte, columns[idx].width);
1749                pos_byte = next_start;
1750                end
1751            };
1752            let slice = &trimmed[start_byte..end_byte];
1753            if idx + 1 == columns.len() {
1754                pos_byte = trimmed.len();
1755            }
1756            slice
1757        } else {
1758            ""
1759        };
1760
1761        // Emit leading whitespace in cell
1762        let cell_trimmed = part.trim();
1763        let ws_start_len = part.len() - part.trim_start().len();
1764        if ws_start_len > 0 {
1765            builder.token(SyntaxKind::WHITESPACE.into(), &part[..ws_start_len]);
1766        }
1767
1768        // Emit TABLE_CELL with inline parsing
1769        emit_table_cell(builder, cell_content, config);
1770
1771        // Emit trailing whitespace in cell
1772        let ws_end_start = ws_start_len + cell_trimmed.len();
1773        if ws_end_start < part.len() {
1774            builder.token(SyntaxKind::WHITESPACE.into(), &part[ws_end_start..]);
1775        }
1776
1777        // Emit pipe separator (unless this is the last cell and line doesn't end with |)
1778        if idx < cell_contents.len() - 1 || trimmed.ends_with('|') {
1779            builder.token(SyntaxKind::TEXT.into(), "|");
1780        }
1781    }
1782
1783    // Emit trailing whitespace before newline
1784    let trailing_ws_start = leading_ws_len + trimmed.len();
1785    if trailing_ws_start < line_without_newline.len() {
1786        builder.token(
1787            SyntaxKind::WHITESPACE.into(),
1788            &line_without_newline[trailing_ws_start..],
1789        );
1790    }
1791
1792    // Emit newline
1793    if !newline_str.is_empty() {
1794        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1795    }
1796
1797    // Emit continuation lines as TEXT for losslessness
1798    for line in lines.iter().skip(1) {
1799        emit_line_tokens(builder, line);
1800    }
1801
1802    builder.finish_node();
1803}
1804
1805/// Try to parse a grid table starting at the given position.
1806/// Returns the number of lines consumed if successful.
1807pub(crate) fn try_parse_grid_table(
1808    lines: &[&str],
1809    start_pos: usize,
1810    builder: &mut GreenNodeBuilder<'static>,
1811    config: &ParserOptions,
1812) -> Option<usize> {
1813    if start_pos >= lines.len() {
1814        return None;
1815    }
1816
1817    // Check if this line is a caption followed by a table
1818    // If so, the actual table starts after the caption and blank line
1819    let (actual_start, caption_before) = if is_caption_followed_by_table(lines, start_pos) {
1820        let (cap_start, cap_end) = caption_range_starting_at(lines, start_pos)?;
1821        let mut pos = cap_end;
1822        while pos < lines.len() && lines[pos].trim().is_empty() {
1823            pos += 1;
1824        }
1825        (pos, Some((cap_start, cap_end)))
1826    } else {
1827        (start_pos, None)
1828    };
1829
1830    if actual_start >= lines.len() {
1831        return None;
1832    }
1833
1834    // First line must be a grid separator
1835    let first_line = lines[actual_start];
1836    let _columns = try_parse_grid_separator(first_line)?;
1837
1838    // Track table structure
1839    let mut end_pos = actual_start + 1;
1840    let mut found_header_sep = false;
1841    let mut in_footer = false;
1842
1843    // Scan table lines
1844    while end_pos < lines.len() {
1845        let line = lines[end_pos];
1846
1847        // Check for blank line (table ends)
1848        if line.trim().is_empty() {
1849            break;
1850        }
1851
1852        // Check for separator line
1853        if let Some(sep_cols) = try_parse_grid_separator(line) {
1854            // Check if this is a header separator (=)
1855            if sep_cols.iter().any(|c| c.is_header_separator) {
1856                if !found_header_sep {
1857                    found_header_sep = true;
1858                } else if !in_footer {
1859                    // Second = separator starts footer
1860                    in_footer = true;
1861                }
1862            }
1863            end_pos += 1;
1864            continue;
1865        }
1866
1867        // Check for content row
1868        if is_grid_content_row(line) {
1869            end_pos += 1;
1870            continue;
1871        }
1872
1873        // Not a valid grid table line - table ends
1874        break;
1875    }
1876
1877    // Must have consumed at least 3 lines (top separator, content, bottom separator)
1878    // Or just top + content rows that end with a separator
1879    if end_pos <= actual_start + 1 {
1880        return None;
1881    }
1882
1883    // Last consumed line should be a separator for a well-formed table
1884    // But we'll be lenient and accept tables ending with content rows
1885
1886    // Check for caption before table (only if we didn't already detected it)
1887    let caption_before = caption_before.or_else(|| find_caption_before_table(lines, actual_start));
1888
1889    // Check for caption after table
1890    let caption_after = if caption_before.is_some() {
1891        None
1892    } else {
1893        find_caption_after_table(lines, end_pos)
1894    };
1895
1896    // Build the grid table
1897    builder.start_node(SyntaxKind::GRID_TABLE.into());
1898
1899    // Emit caption before if present
1900    if let Some((cap_start, cap_end)) = caption_before {
1901        emit_table_caption(builder, lines, cap_start, cap_end, config);
1902        // Emit blank line between caption and table if present
1903        if cap_end < actual_start {
1904            for line in lines.iter().take(actual_start).skip(cap_end) {
1905                if line.trim().is_empty() {
1906                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1907                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1908                    builder.finish_node();
1909                }
1910            }
1911        }
1912    }
1913
1914    // Track whether we've passed the header separator
1915    let mut past_header_sep = false;
1916    let mut in_footer_section = false;
1917    let mut current_row_lines: Vec<&str> = Vec::new();
1918    let mut current_row_kind = SyntaxKind::TABLE_HEADER;
1919
1920    // Emit table rows - accumulate multi-line cells
1921    for line in lines.iter().take(end_pos).skip(actual_start) {
1922        if let Some(sep_cols) = try_parse_grid_separator(line) {
1923            // Separator line - emit any accumulated row first
1924            if !current_row_lines.is_empty() {
1925                emit_grid_table_row(
1926                    builder,
1927                    &current_row_lines,
1928                    &sep_cols,
1929                    current_row_kind,
1930                    config,
1931                );
1932                current_row_lines.clear();
1933            }
1934
1935            let is_header_sep = sep_cols.iter().any(|c| c.is_header_separator);
1936
1937            if is_header_sep {
1938                if !past_header_sep {
1939                    // This is the header/body separator
1940                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1941                    emit_line_tokens(builder, line);
1942                    builder.finish_node();
1943                    past_header_sep = true;
1944                } else {
1945                    // Footer separator
1946                    if !in_footer_section {
1947                        in_footer_section = true;
1948                    }
1949                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1950                    emit_line_tokens(builder, line);
1951                    builder.finish_node();
1952                }
1953            } else {
1954                // Regular separator (row boundary)
1955                builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1956                emit_line_tokens(builder, line);
1957                builder.finish_node();
1958            }
1959        } else if is_grid_content_row(line) {
1960            // Content row - accumulate for multi-line cells
1961            current_row_kind = if !past_header_sep && found_header_sep {
1962                SyntaxKind::TABLE_HEADER
1963            } else if in_footer_section {
1964                SyntaxKind::TABLE_FOOTER
1965            } else {
1966                SyntaxKind::TABLE_ROW
1967            };
1968
1969            current_row_lines.push(line);
1970        }
1971    }
1972
1973    // Emit any remaining accumulated row
1974    if !current_row_lines.is_empty() {
1975        // Use first separator's columns for cell boundaries
1976        if let Some(sep_cols) = try_parse_grid_separator(lines[actual_start]) {
1977            emit_grid_table_row(
1978                builder,
1979                &current_row_lines,
1980                &sep_cols,
1981                current_row_kind,
1982                config,
1983            );
1984        }
1985    }
1986
1987    // Emit caption after if present
1988    if let Some((cap_start, cap_end)) = caption_after {
1989        if cap_start > end_pos {
1990            for line in lines.iter().take(cap_start).skip(end_pos) {
1991                if line.trim().is_empty() {
1992                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1993                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1994                    builder.finish_node();
1995                }
1996            }
1997        }
1998        emit_table_caption(builder, lines, cap_start, cap_end, config);
1999    }
2000
2001    builder.finish_node(); // GRID_TABLE
2002
2003    // Calculate lines consumed
2004    let table_start = caption_before
2005        .map(|(start, _)| start)
2006        .unwrap_or(actual_start);
2007    let table_end = if let Some((_, cap_end)) = caption_after {
2008        cap_end
2009    } else {
2010        end_pos
2011    };
2012
2013    Some(table_end - table_start)
2014}
2015
2016#[cfg(test)]
2017mod grid_table_tests {
2018    use super::*;
2019
2020    #[test]
2021    fn test_grid_separator_detection() {
2022        assert!(try_parse_grid_separator("+---+---+").is_some());
2023        assert!(try_parse_grid_separator("+===+===+").is_some());
2024        assert!(try_parse_grid_separator("+---------------+---------------+").is_some());
2025        assert!(try_parse_grid_separator("+:---:+").is_some()); // center aligned
2026        assert!(try_parse_grid_separator("not a separator").is_none());
2027        assert!(try_parse_grid_separator("|---|---|").is_none()); // pipe table sep
2028    }
2029
2030    #[test]
2031    fn test_grid_header_separator() {
2032        let cols = try_parse_grid_separator("+===+===+").unwrap();
2033        assert!(cols.iter().all(|c| c.is_header_separator));
2034
2035        let cols2 = try_parse_grid_separator("+---+---+").unwrap();
2036        assert!(cols2.iter().all(|c| !c.is_header_separator));
2037    }
2038
2039    #[test]
2040    fn test_grid_content_row_detection() {
2041        assert!(is_grid_content_row("| content | content |"));
2042        assert!(is_grid_content_row("|  |  |"));
2043        assert!(is_grid_content_row("| content +------+"));
2044        assert!(!is_grid_content_row("+---+---+")); // separator, not content
2045        assert!(!is_grid_content_row("no pipes here"));
2046    }
2047
2048    #[test]
2049    fn test_basic_grid_table() {
2050        let input = vec![
2051            "+-------+-------+",
2052            "| Col1  | Col2  |",
2053            "+=======+=======+",
2054            "| A     | B     |",
2055            "+-------+-------+",
2056            "",
2057        ];
2058
2059        let mut builder = GreenNodeBuilder::new();
2060        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2061
2062        assert!(result.is_some());
2063        assert_eq!(result.unwrap(), 5);
2064    }
2065
2066    #[test]
2067    fn test_grid_table_multirow() {
2068        let input = vec![
2069            "+---------------+---------------+",
2070            "| Fruit         | Advantages    |",
2071            "+===============+===============+",
2072            "| Bananas       | - wrapper     |",
2073            "|               | - color       |",
2074            "+---------------+---------------+",
2075            "| Oranges       | - scurvy      |",
2076            "|               | - tasty       |",
2077            "+---------------+---------------+",
2078            "",
2079        ];
2080
2081        let mut builder = GreenNodeBuilder::new();
2082        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2083
2084        assert!(result.is_some());
2085        assert_eq!(result.unwrap(), 9);
2086    }
2087
2088    #[test]
2089    fn test_grid_table_with_footer() {
2090        let input = vec![
2091            "+-------+-------+",
2092            "| Fruit | Price |",
2093            "+=======+=======+",
2094            "| Apple | $1.00 |",
2095            "+-------+-------+",
2096            "| Pear  | $1.50 |",
2097            "+=======+=======+",
2098            "| Total | $2.50 |",
2099            "+=======+=======+",
2100            "",
2101        ];
2102
2103        let mut builder = GreenNodeBuilder::new();
2104        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2105
2106        assert!(result.is_some());
2107        assert_eq!(result.unwrap(), 9);
2108    }
2109
2110    #[test]
2111    fn test_grid_table_headerless() {
2112        let input = vec![
2113            "+-------+-------+",
2114            "| A     | B     |",
2115            "+-------+-------+",
2116            "| C     | D     |",
2117            "+-------+-------+",
2118            "",
2119        ];
2120
2121        let mut builder = GreenNodeBuilder::new();
2122        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2123
2124        assert!(result.is_some());
2125        assert_eq!(result.unwrap(), 5);
2126    }
2127
2128    #[test]
2129    fn test_grid_table_with_caption_before() {
2130        let input = vec![
2131            ": Sample table",
2132            "",
2133            "+-------+-------+",
2134            "| A     | B     |",
2135            "+=======+=======+",
2136            "| C     | D     |",
2137            "+-------+-------+",
2138            "",
2139        ];
2140
2141        let mut builder = GreenNodeBuilder::new();
2142        let result = try_parse_grid_table(&input, 2, &mut builder, &ParserOptions::default());
2143
2144        assert!(result.is_some());
2145        // Should include caption + blank + table
2146        assert_eq!(result.unwrap(), 7);
2147    }
2148
2149    #[test]
2150    fn test_grid_table_with_caption_after() {
2151        let input = vec![
2152            "+-------+-------+",
2153            "| A     | B     |",
2154            "+=======+=======+",
2155            "| C     | D     |",
2156            "+-------+-------+",
2157            "",
2158            "Table: My grid table",
2159            "",
2160        ];
2161
2162        let mut builder = GreenNodeBuilder::new();
2163        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2164
2165        assert!(result.is_some());
2166        // table + blank + caption
2167        assert_eq!(result.unwrap(), 7);
2168    }
2169}
2170
2171// ============================================================================
2172// Multiline Table Parsing
2173// ============================================================================
2174
2175/// Check if a line is a multiline table separator (continuous dashes).
2176/// Multiline table separators span the full width and are all dashes.
2177/// Returns Some(columns) if valid, None otherwise.
2178fn try_parse_multiline_separator(line: &str) -> Option<Vec<Column>> {
2179    let trimmed = line.trim_start();
2180    let leading_spaces = line.len() - trimmed.len();
2181
2182    // Must have leading spaces <= 3 to not be a code block
2183    if leading_spaces > 3 {
2184        return None;
2185    }
2186
2187    let trimmed = trimmed.trim_end();
2188
2189    // Must be all dashes (continuous line of dashes)
2190    if trimmed.is_empty() || !trimmed.chars().all(|c| c == '-') {
2191        return None;
2192    }
2193
2194    // Must have at least 3 dashes
2195    if trimmed.len() < 3 {
2196        return None;
2197    }
2198
2199    // This is a full-width separator - columns will be determined by column separator lines
2200    Some(vec![Column {
2201        start: leading_spaces,
2202        end: leading_spaces + trimmed.len(),
2203        alignment: Alignment::Default,
2204    }])
2205}
2206
2207/// Check if a line is a column separator line for multiline tables.
2208/// Column separators have dashes with spaces between them to define columns.
2209fn is_column_separator(line: &str) -> bool {
2210    try_parse_table_separator(line).is_some() && !line.contains('*') && !line.contains('_')
2211}
2212
2213fn is_headerless_single_row_without_blank(
2214    lines: &[&str],
2215    row_start: usize,
2216    row_end: usize,
2217    columns: &[Column],
2218) -> bool {
2219    if row_start >= row_end {
2220        return false;
2221    }
2222
2223    if row_end - row_start == 1 {
2224        return false;
2225    }
2226
2227    let Some(last_col) = columns.last() else {
2228        return false;
2229    };
2230
2231    for line in lines.iter().take(row_end).skip(row_start + 1) {
2232        let (content, _) = strip_newline(line);
2233        let prefix_end = last_col.start.min(content.len());
2234        if !content[..prefix_end].trim().is_empty() {
2235            return false;
2236        }
2237    }
2238
2239    true
2240}
2241
2242/// Try to parse a multiline table starting at the given position.
2243/// Returns the number of lines consumed if successful.
2244pub(crate) fn try_parse_multiline_table(
2245    lines: &[&str],
2246    start_pos: usize,
2247    builder: &mut GreenNodeBuilder<'static>,
2248    config: &ParserOptions,
2249) -> Option<usize> {
2250    if start_pos >= lines.len() {
2251        return None;
2252    }
2253
2254    let first_line = lines[start_pos];
2255
2256    // First line can be either:
2257    // 1. A full-width dash separator (for tables with headers)
2258    // 2. A column separator (for headerless tables)
2259    let is_full_width_start = try_parse_multiline_separator(first_line).is_some();
2260    let is_column_sep_start = !is_full_width_start && is_column_separator(first_line);
2261    let headerless_columns = if is_column_sep_start {
2262        try_parse_table_separator(first_line)
2263    } else {
2264        None
2265    };
2266
2267    if !is_full_width_start && !is_column_sep_start {
2268        return None;
2269    }
2270
2271    // Look ahead to find the structure
2272    let mut pos = start_pos + 1;
2273    let mut found_column_sep = is_column_sep_start; // Already found if headerless
2274    let mut column_sep_pos = if is_column_sep_start { start_pos } else { 0 };
2275    let mut has_header = false;
2276    let mut found_blank_line = false;
2277    let mut found_closing_sep = false;
2278    let mut content_line_count = 0usize;
2279
2280    // Scan for header section and column separator
2281    while pos < lines.len() {
2282        let line = lines[pos];
2283
2284        // Check for column separator (defines columns) - only if we started with full-width
2285        if is_full_width_start && is_column_separator(line) && !found_column_sep {
2286            found_column_sep = true;
2287            column_sep_pos = pos;
2288            has_header = pos > start_pos + 1; // Has header if there's content before column sep
2289            pos += 1;
2290            continue;
2291        }
2292
2293        // Check for blank line (row separator in body)
2294        if line.trim().is_empty() {
2295            found_blank_line = true;
2296            pos += 1;
2297            // Check if next line is a valid closing separator for this table shape.
2298            if pos < lines.len() {
2299                let next = lines[pos];
2300                let is_valid_closer = if is_full_width_start {
2301                    try_parse_multiline_separator(next).is_some()
2302                } else {
2303                    is_column_separator(next)
2304                };
2305                if is_valid_closer {
2306                    found_closing_sep = true;
2307                    pos += 1; // Include the closing separator
2308                    break;
2309                }
2310            }
2311            continue;
2312        }
2313
2314        // Check for closing full-width dashes (only for full-width-start tables).
2315        if is_full_width_start && try_parse_multiline_separator(line).is_some() {
2316            found_closing_sep = true;
2317            pos += 1;
2318            break;
2319        }
2320
2321        // Check for closing column separator (for headerless tables)
2322        if is_column_sep_start && is_column_separator(line) && content_line_count > 0 {
2323            found_closing_sep = true;
2324            pos += 1;
2325            break;
2326        }
2327
2328        // Content row
2329        content_line_count += 1;
2330        pos += 1;
2331    }
2332
2333    // Must have found a column separator to be a valid multiline table
2334    if !found_column_sep {
2335        return None;
2336    }
2337
2338    // Must have had at least one blank line between rows (distinguishes from simple tables)
2339    if !found_blank_line {
2340        if !is_column_sep_start {
2341            return None;
2342        }
2343        let columns = headerless_columns.as_deref()?;
2344        if !is_headerless_single_row_without_blank(lines, start_pos + 1, pos - 1, columns) {
2345            return None;
2346        }
2347    }
2348
2349    // Must have a closing separator
2350    if !found_closing_sep {
2351        return None;
2352    }
2353
2354    // Must have consumed more than just the opening separator
2355    if pos <= start_pos + 2 {
2356        return None;
2357    }
2358
2359    let end_pos = pos;
2360
2361    // Extract column boundaries from the separator line
2362    let columns =
2363        try_parse_table_separator(lines[column_sep_pos]).expect("Column separator must be valid");
2364
2365    // Check for caption before table
2366    let caption_before = find_caption_before_table(lines, start_pos);
2367
2368    // Check for caption after table
2369    let caption_after = if caption_before.is_some() {
2370        None
2371    } else {
2372        find_caption_after_table(lines, end_pos)
2373    };
2374
2375    // Build the multiline table
2376    builder.start_node(SyntaxKind::MULTILINE_TABLE.into());
2377
2378    // Emit caption before if present
2379    if let Some((cap_start, cap_end)) = caption_before {
2380        emit_table_caption(builder, lines, cap_start, cap_end, config);
2381
2382        // Emit blank line between caption and table if present
2383        if cap_end < start_pos {
2384            for line in lines.iter().take(start_pos).skip(cap_end) {
2385                if line.trim().is_empty() {
2386                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2387                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2388                    builder.finish_node();
2389                }
2390            }
2391        }
2392    }
2393
2394    // Emit opening separator
2395    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2396    emit_line_tokens(builder, lines[start_pos]);
2397    builder.finish_node();
2398
2399    // Track state for emitting
2400    let mut in_header = has_header;
2401    let mut current_row_lines: Vec<&str> = Vec::new();
2402
2403    for (i, line) in lines.iter().enumerate().take(end_pos).skip(start_pos + 1) {
2404        // Column separator (header/body divider)
2405        if i == column_sep_pos {
2406            // Emit any accumulated header lines
2407            if !current_row_lines.is_empty() {
2408                emit_multiline_table_row(
2409                    builder,
2410                    &current_row_lines,
2411                    &columns,
2412                    SyntaxKind::TABLE_HEADER,
2413                    config,
2414                );
2415                current_row_lines.clear();
2416            }
2417
2418            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2419            emit_line_tokens(builder, line);
2420            builder.finish_node();
2421            in_header = false;
2422            continue;
2423        }
2424
2425        // Closing separator (full-width or column separator at end)
2426        if try_parse_multiline_separator(line).is_some() || is_column_separator(line) {
2427            // Emit any accumulated row lines
2428            if !current_row_lines.is_empty() {
2429                let kind = if in_header {
2430                    SyntaxKind::TABLE_HEADER
2431                } else {
2432                    SyntaxKind::TABLE_ROW
2433                };
2434                emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2435                current_row_lines.clear();
2436            }
2437
2438            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2439            emit_line_tokens(builder, line);
2440            builder.finish_node();
2441            continue;
2442        }
2443
2444        // Blank line (row separator)
2445        if line.trim().is_empty() {
2446            // Emit accumulated row
2447            if !current_row_lines.is_empty() {
2448                let kind = if in_header {
2449                    SyntaxKind::TABLE_HEADER
2450                } else {
2451                    SyntaxKind::TABLE_ROW
2452                };
2453                emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2454                current_row_lines.clear();
2455            }
2456
2457            builder.start_node(SyntaxKind::BLANK_LINE.into());
2458            builder.token(SyntaxKind::BLANK_LINE.into(), line);
2459            builder.finish_node();
2460            continue;
2461        }
2462
2463        // Content line - accumulate for current row
2464        current_row_lines.push(line);
2465    }
2466
2467    // Emit any remaining accumulated lines
2468    if !current_row_lines.is_empty() {
2469        let kind = if in_header {
2470            SyntaxKind::TABLE_HEADER
2471        } else {
2472            SyntaxKind::TABLE_ROW
2473        };
2474        emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2475    }
2476
2477    // Emit caption after if present
2478    if let Some((cap_start, cap_end)) = caption_after {
2479        if cap_start > end_pos {
2480            for line in lines.iter().take(cap_start).skip(end_pos) {
2481                if line.trim().is_empty() {
2482                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2483                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2484                    builder.finish_node();
2485                }
2486            }
2487        }
2488        emit_table_caption(builder, lines, cap_start, cap_end, config);
2489    }
2490
2491    builder.finish_node(); // MultilineTable
2492
2493    // Calculate lines consumed
2494    let table_start = caption_before.map(|(start, _)| start).unwrap_or(start_pos);
2495    let table_end = if let Some((_, cap_end)) = caption_after {
2496        cap_end
2497    } else {
2498        end_pos
2499    };
2500
2501    Some(table_end - table_start)
2502}
2503
2504/// Extract cell contents from first line only (for CST emission).
2505/// Multi-line content will be in continuation TEXT tokens.
2506fn extract_first_line_cell_contents(line: &str, columns: &[Column]) -> Vec<String> {
2507    let (line_content, _) = strip_newline(line);
2508    let mut cells = Vec::new();
2509
2510    for column in columns.iter() {
2511        let column_start = column_offset_to_byte_index(line_content, column.start);
2512        let column_end = column_offset_to_byte_index(line_content, column.end);
2513
2514        // Extract FULL text for this column (including whitespace)
2515        let cell_text = if column_start < column_end {
2516            &line_content[column_start..column_end]
2517        } else if column_start < line_content.len() {
2518            &line_content[column_start..]
2519        } else {
2520            ""
2521        };
2522
2523        cells.push(cell_text.to_string());
2524    }
2525
2526    cells
2527}
2528
2529/// Emit a multiline table row with inline parsing (Phase 7.1).
2530fn emit_multiline_table_row(
2531    builder: &mut GreenNodeBuilder<'static>,
2532    lines: &[&str],
2533    columns: &[Column],
2534    kind: SyntaxKind,
2535    config: &ParserOptions,
2536) {
2537    if lines.is_empty() {
2538        return;
2539    }
2540
2541    // Extract cell contents from first line only (for CST losslessness)
2542    let first_line = lines[0];
2543    let cell_contents = extract_first_line_cell_contents(first_line, columns);
2544
2545    builder.start_node(kind.into());
2546
2547    // Emit first line with TABLE_CELL nodes
2548    let (trimmed, newline_str) = strip_newline(first_line);
2549    let mut current_pos = 0;
2550
2551    for (col_idx, column) in columns.iter().enumerate() {
2552        let cell_text = &cell_contents[col_idx];
2553        let cell_start = column_offset_to_byte_index(trimmed, column.start);
2554        let cell_end = column_offset_to_byte_index(trimmed, column.end);
2555
2556        // Emit whitespace before cell
2557        if current_pos < cell_start {
2558            builder.token(
2559                SyntaxKind::WHITESPACE.into(),
2560                &trimmed[current_pos..cell_start],
2561            );
2562        }
2563
2564        // Emit cell with inline parsing (first line content only)
2565        emit_table_cell(builder, cell_text, config);
2566
2567        current_pos = cell_end;
2568    }
2569
2570    // Emit trailing whitespace
2571    if current_pos < trimmed.len() {
2572        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
2573    }
2574
2575    // Emit newline
2576    if !newline_str.is_empty() {
2577        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2578    }
2579
2580    // Emit continuation lines as TEXT to preserve exact line structure
2581    for line in lines.iter().skip(1) {
2582        emit_line_tokens(builder, line);
2583    }
2584
2585    builder.finish_node();
2586}
2587
2588#[cfg(test)]
2589mod multiline_table_tests {
2590    use super::*;
2591    use crate::syntax::SyntaxNode;
2592
2593    #[test]
2594    fn test_multiline_separator_detection() {
2595        assert!(
2596            try_parse_multiline_separator(
2597                "-------------------------------------------------------------"
2598            )
2599            .is_some()
2600        );
2601        assert!(try_parse_multiline_separator("---").is_some());
2602        assert!(try_parse_multiline_separator("  -----").is_some()); // with leading spaces
2603        assert!(try_parse_multiline_separator("--").is_none()); // too short
2604        assert!(try_parse_multiline_separator("--- ---").is_none()); // has spaces
2605        assert!(try_parse_multiline_separator("+---+").is_none()); // grid separator
2606    }
2607
2608    #[test]
2609    fn test_basic_multiline_table() {
2610        let input = vec![
2611            "-------------------------------------------------------------",
2612            " Centered   Default           Right Left",
2613            "  Header    Aligned         Aligned Aligned",
2614            "----------- ------- --------------- -------------------------",
2615            "   First    row                12.0 Example of a row that",
2616            "                                    spans multiple lines.",
2617            "",
2618            "  Second    row                 5.0 Here's another one.",
2619            "-------------------------------------------------------------",
2620            "",
2621        ];
2622
2623        let mut builder = GreenNodeBuilder::new();
2624        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2625
2626        assert!(result.is_some());
2627        assert_eq!(result.unwrap(), 9);
2628    }
2629
2630    #[test]
2631    fn test_multiline_table_headerless() {
2632        let input = vec![
2633            "----------- ------- --------------- -------------------------",
2634            "   First    row                12.0 Example of a row that",
2635            "                                    spans multiple lines.",
2636            "",
2637            "  Second    row                 5.0 Here's another one.",
2638            "----------- ------- --------------- -------------------------",
2639            "",
2640        ];
2641
2642        let mut builder = GreenNodeBuilder::new();
2643        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2644
2645        assert!(result.is_some());
2646        assert_eq!(result.unwrap(), 6);
2647    }
2648
2649    #[test]
2650    fn test_multiline_table_headerless_single_line_is_not_multiline() {
2651        let input = vec![
2652            "-------     ------ ----------   -------",
2653            "     12     12        12             12",
2654            "-------     ------ ----------   -------",
2655            "",
2656            "Not part of table.",
2657            "",
2658        ];
2659
2660        let mut builder = GreenNodeBuilder::new();
2661        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2662
2663        assert!(result.is_none());
2664    }
2665
2666    #[test]
2667    fn test_multiline_table_headerless_single_row_continuation_without_blank_line() {
2668        let input = vec![
2669            "----------  ---------  -----------  ---------------------------",
2670            "   First    row               12.0  Example of a row that spans",
2671            "                                    multiple lines.",
2672            "----------  ---------  -----------  ---------------------------",
2673            "",
2674        ];
2675
2676        let mut builder = GreenNodeBuilder::new();
2677        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2678
2679        assert!(result.is_some());
2680        assert_eq!(result.unwrap(), 4);
2681    }
2682
2683    #[test]
2684    fn test_multiline_table_with_caption() {
2685        let input = vec![
2686            "-------------------------------------------------------------",
2687            " Col1       Col2",
2688            "----------- -------",
2689            "   A        B",
2690            "",
2691            "-------------------------------------------------------------",
2692            "",
2693            "Table: Here's the caption.",
2694            "",
2695        ];
2696
2697        let mut builder = GreenNodeBuilder::new();
2698        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2699
2700        assert!(result.is_some());
2701        // table (6 lines) + blank + caption
2702        assert_eq!(result.unwrap(), 8);
2703    }
2704
2705    #[test]
2706    fn test_multiline_table_single_row() {
2707        let input = vec![
2708            "---------------------------------------------",
2709            " Header1    Header2",
2710            "----------- -----------",
2711            "   Data     More data",
2712            "",
2713            "---------------------------------------------",
2714            "",
2715        ];
2716
2717        let mut builder = GreenNodeBuilder::new();
2718        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2719
2720        assert!(result.is_some());
2721        assert_eq!(result.unwrap(), 6);
2722    }
2723
2724    #[test]
2725    fn test_headerless_multiline_table_does_not_close_on_full_width_rule() {
2726        let input = vec![
2727            "- - - - -",
2728            "Third section with underscores.",
2729            "",
2730            "_____",
2731            "",
2732            "> Quote before rule",
2733            ">",
2734            "> ***",
2735            ">",
2736            "> Quote after rule",
2737            "",
2738            "Final paragraph.",
2739            "",
2740            "Here's a horizontal rule:",
2741            "",
2742            "---",
2743            "Text directly after the horizontal rule.",
2744            "",
2745        ];
2746
2747        let mut builder = GreenNodeBuilder::new();
2748        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2749
2750        assert!(result.is_none());
2751    }
2752
2753    #[test]
2754    fn test_not_multiline_table() {
2755        // Simple table should not be parsed as multiline
2756        let input = vec![
2757            "  Right     Left     Center     Default",
2758            "-------     ------ ----------   -------",
2759            "     12     12        12            12",
2760            "",
2761        ];
2762
2763        let mut builder = GreenNodeBuilder::new();
2764        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2765
2766        // Should not parse because first line isn't a full-width separator
2767        assert!(result.is_none());
2768    }
2769
2770    // Phase 7.1: Unit tests for emit_table_cell() helper
2771    #[test]
2772    fn test_emit_table_cell_plain_text() {
2773        let mut builder = GreenNodeBuilder::new();
2774        emit_table_cell(&mut builder, "Cell", &ParserOptions::default());
2775        let green = builder.finish();
2776        let node = SyntaxNode::new_root(green);
2777
2778        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2779        assert_eq!(node.text(), "Cell");
2780
2781        // Should have TEXT child
2782        let children: Vec<_> = node.children_with_tokens().collect();
2783        assert_eq!(children.len(), 1);
2784        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
2785    }
2786
2787    #[test]
2788    fn test_emit_table_cell_with_emphasis() {
2789        let mut builder = GreenNodeBuilder::new();
2790        emit_table_cell(&mut builder, "*italic*", &ParserOptions::default());
2791        let green = builder.finish();
2792        let node = SyntaxNode::new_root(green);
2793
2794        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2795        assert_eq!(node.text(), "*italic*");
2796
2797        // Should have EMPHASIS child
2798        let children: Vec<_> = node.children().collect();
2799        assert_eq!(children.len(), 1);
2800        assert_eq!(children[0].kind(), SyntaxKind::EMPHASIS);
2801    }
2802
2803    #[test]
2804    fn test_emit_table_cell_with_code() {
2805        let mut builder = GreenNodeBuilder::new();
2806        emit_table_cell(&mut builder, "`code`", &ParserOptions::default());
2807        let green = builder.finish();
2808        let node = SyntaxNode::new_root(green);
2809
2810        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2811        assert_eq!(node.text(), "`code`");
2812
2813        // Should have CODE_SPAN child
2814        let children: Vec<_> = node.children().collect();
2815        assert_eq!(children.len(), 1);
2816        assert_eq!(children[0].kind(), SyntaxKind::INLINE_CODE);
2817    }
2818
2819    #[test]
2820    fn test_emit_table_cell_with_link() {
2821        let mut builder = GreenNodeBuilder::new();
2822        emit_table_cell(&mut builder, "[text](url)", &ParserOptions::default());
2823        let green = builder.finish();
2824        let node = SyntaxNode::new_root(green);
2825
2826        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2827        assert_eq!(node.text(), "[text](url)");
2828
2829        // Should have LINK child
2830        let children: Vec<_> = node.children().collect();
2831        assert_eq!(children.len(), 1);
2832        assert_eq!(children[0].kind(), SyntaxKind::LINK);
2833    }
2834
2835    #[test]
2836    fn test_emit_table_cell_with_strong() {
2837        let mut builder = GreenNodeBuilder::new();
2838        emit_table_cell(&mut builder, "**bold**", &ParserOptions::default());
2839        let green = builder.finish();
2840        let node = SyntaxNode::new_root(green);
2841
2842        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2843        assert_eq!(node.text(), "**bold**");
2844
2845        // Should have STRONG child
2846        let children: Vec<_> = node.children().collect();
2847        assert_eq!(children.len(), 1);
2848        assert_eq!(children[0].kind(), SyntaxKind::STRONG);
2849    }
2850
2851    #[test]
2852    fn test_emit_table_cell_mixed_inline() {
2853        let mut builder = GreenNodeBuilder::new();
2854        emit_table_cell(
2855            &mut builder,
2856            "Text **bold** and `code`",
2857            &ParserOptions::default(),
2858        );
2859        let green = builder.finish();
2860        let node = SyntaxNode::new_root(green);
2861
2862        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2863        assert_eq!(node.text(), "Text **bold** and `code`");
2864
2865        // Should have multiple children: TEXT, STRONG, TEXT, CODE_SPAN
2866        let children: Vec<_> = node.children_with_tokens().collect();
2867        assert!(children.len() >= 4);
2868
2869        // Check some expected types
2870        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
2871        assert_eq!(children[1].kind(), SyntaxKind::STRONG);
2872    }
2873
2874    #[test]
2875    fn test_emit_table_cell_empty() {
2876        let mut builder = GreenNodeBuilder::new();
2877        emit_table_cell(&mut builder, "", &ParserOptions::default());
2878        let green = builder.finish();
2879        let node = SyntaxNode::new_root(green);
2880
2881        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2882        assert_eq!(node.text(), "");
2883
2884        // Empty cell should have no children
2885        let children: Vec<_> = node.children_with_tokens().collect();
2886        assert_eq!(children.len(), 0);
2887    }
2888
2889    #[test]
2890    fn test_emit_table_cell_escaped_pipe() {
2891        let mut builder = GreenNodeBuilder::new();
2892        emit_table_cell(&mut builder, r"A \| B", &ParserOptions::default());
2893        let green = builder.finish();
2894        let node = SyntaxNode::new_root(green);
2895
2896        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2897        // The escaped pipe should be preserved
2898        assert_eq!(node.text(), r"A \| B");
2899    }
2900}