Skip to main content

panache_parser/parser/blocks/
tables.rs

1//! Simple table parsing for Pandoc's simple_tables extension.
2
3use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6use unicode_width::UnicodeWidthChar;
7
8use crate::parser::utils::helpers::{emit_line_tokens, strip_newline};
9use crate::parser::utils::inline_emission;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum Alignment {
13    Left,
14    Right,
15    Center,
16    Default,
17}
18
19/// Column information extracted from the separator line.
20#[derive(Debug, Clone)]
21pub(crate) struct Column {
22    /// Start position (byte index) in the line
23    start: usize,
24    /// End position (byte index) in the line
25    end: usize,
26    /// Column alignment
27    alignment: Alignment,
28}
29
30/// Try to detect if a line is a table separator line.
31/// Returns Some(column positions) if it's a valid separator.
32pub(crate) fn try_parse_table_separator(line: &str) -> Option<Vec<Column>> {
33    let trimmed = line.trim_start();
34    // Strip trailing newline if present (CRLF or LF)
35    let (trimmed, newline_str) = strip_newline(trimmed);
36    let leading_spaces = line.len() - trimmed.len() - newline_str.len();
37
38    // Must have leading spaces <= 3 to not be a code block
39    if leading_spaces > 3 {
40        return None;
41    }
42
43    // Simple tables only use dashed separators.
44    if trimmed.contains('*') || trimmed.contains('_') {
45        return None;
46    }
47
48    // Must contain at least one dash
49    if !trimmed.contains('-') {
50        return None;
51    }
52
53    // A separator line consists of dashes and spaces
54    if !trimmed.chars().all(|c| c == '-' || c == ' ') {
55        return None;
56    }
57
58    // Must not be a horizontal rule.
59    let dash_groups: Vec<_> = trimmed.split(' ').filter(|s| !s.is_empty()).collect();
60    if dash_groups.len() <= 1 {
61        return None;
62    }
63
64    // Extract column positions from dash groups
65    let columns = extract_columns(trimmed, leading_spaces);
66
67    if columns.is_empty() {
68        return None;
69    }
70
71    Some(columns)
72}
73
74/// Extract column positions from a separator line.
75fn extract_columns(separator: &str, offset: usize) -> Vec<Column> {
76    let mut columns = Vec::new();
77    let mut in_dashes = false;
78    let mut col_start = 0;
79
80    for (i, ch) in separator.char_indices() {
81        match ch {
82            '-' => {
83                if !in_dashes {
84                    col_start = i + offset;
85                    in_dashes = true;
86                }
87            }
88            ' ' => {
89                if in_dashes {
90                    columns.push(Column {
91                        start: col_start,
92                        end: i + offset,
93                        alignment: Alignment::Default, // Will be determined later
94                    });
95                    in_dashes = false;
96                }
97            }
98            _ => {}
99        }
100    }
101
102    // Handle last column
103    if in_dashes {
104        columns.push(Column {
105            start: col_start,
106            end: separator.len() + offset,
107            alignment: Alignment::Default,
108        });
109    }
110
111    columns
112}
113
114/// Try to parse a table caption from a line.
115/// Returns Some((prefix_len, caption_text)) if it's a caption.
116fn try_parse_caption_prefix(line: &str) -> Option<(usize, &str)> {
117    let trimmed = line.trim_start();
118    let leading_spaces = line.len() - trimmed.len();
119
120    // Must have leading spaces <= 3 to not be a code block
121    if leading_spaces > 3 {
122        return None;
123    }
124
125    // Check for "Table:" or "table:" or just ":".
126    if let Some(rest) = trimmed.strip_prefix("Table:") {
127        Some((leading_spaces + 6, rest))
128    } else if let Some(rest) = trimmed.strip_prefix("table:") {
129        Some((leading_spaces + 6, rest))
130    } else if let Some(rest) = trimmed.strip_prefix(':') {
131        // Just ":" caption markers must be followed by whitespace (Pandoc-style).
132        // This avoids accidentally treating constructs like fenced div fences ":::" as captions.
133        if rest.starts_with(|c: char| c.is_whitespace()) {
134            Some((leading_spaces + 1, rest))
135        } else {
136            None
137        }
138    } else {
139        None
140    }
141}
142
143/// Check if a line could be the start of a table caption.
144fn is_table_caption_start(line: &str) -> bool {
145    try_parse_caption_prefix(line).is_some()
146}
147
148fn is_bare_colon_caption_start(line: &str) -> bool {
149    let trimmed = line.trim_start();
150    trimmed.starts_with(':') && !trimmed.starts_with("::") && !trimmed.starts_with(":::")
151}
152
153fn bare_colon_caption_looks_like_definition_code_block(line: &str) -> bool {
154    let Some((_, rest)) = try_parse_caption_prefix(line) else {
155        return false;
156    };
157    let trimmed = rest.trim_start();
158    trimmed.starts_with("```") || trimmed.starts_with("~~~")
159}
160
161fn is_valid_caption_start_before_table(lines: &[&str], pos: usize) -> bool {
162    if !is_table_caption_start(lines[pos]) {
163        return false;
164    }
165
166    if is_bare_colon_caption_start(lines[pos])
167        && bare_colon_caption_looks_like_definition_code_block(lines[pos])
168    {
169        return false;
170    }
171
172    // Avoid stealing definition-list definitions (":   ...") as table captions.
173    if is_bare_colon_caption_start(lines[pos]) && pos > 0 && !lines[pos - 1].trim().is_empty() {
174        return false;
175    }
176    true
177}
178
179/// Check if a line could be the start of a grid table.
180/// Grid tables start with a separator line like +---+---+ or +===+===+
181fn is_grid_table_start(line: &str) -> bool {
182    try_parse_grid_separator(line).is_some()
183}
184
185/// Check if a line could be the start of a multiline table.
186/// Multiline tables start with either:
187/// - A full-width dash separator (----)
188/// - A column separator with dashes and spaces (---- ---- ----)
189fn is_multiline_table_start(line: &str) -> bool {
190    try_parse_multiline_separator(line).is_some() || is_column_separator(line)
191}
192
193/// Check if there's a table following a potential caption at this position.
194/// This is used to avoid parsing a caption as a paragraph when it belongs to a table.
195pub(crate) fn is_caption_followed_by_table(lines: &[&str], caption_pos: usize) -> bool {
196    if caption_pos >= lines.len() {
197        return false;
198    }
199
200    // Caption must start with a caption prefix
201    if !is_valid_caption_start_before_table(lines, caption_pos) {
202        return false;
203    }
204
205    let mut pos = caption_pos + 1;
206
207    // Skip continuation lines of caption (non-blank lines)
208    while pos < lines.len() && !lines[pos].trim().is_empty() {
209        // If we hit a table separator, we found a table
210        if try_parse_table_separator(lines[pos]).is_some() {
211            return true;
212        }
213        pos += 1;
214    }
215
216    // Skip one blank line
217    if pos < lines.len() && lines[pos].trim().is_empty() {
218        pos += 1;
219    }
220
221    // Check for table at next position
222    if pos < lines.len() {
223        let line = lines[pos];
224
225        // Check for grid table start (+---+---+ or +===+===+)
226        if is_grid_table_start(line) {
227            return true;
228        }
229
230        // Check for multiline table start (---- or ---- ---- ----)
231        if is_multiline_table_start(line) {
232            return true;
233        }
234
235        // Could be a separator line (simple/pipe table, headerless)
236        if try_parse_table_separator(line).is_some() {
237            return true;
238        }
239
240        // Or could be a header line followed by separator (simple/pipe table with header)
241        if pos + 1 < lines.len() && !line.trim().is_empty() {
242            let next_line = lines[pos + 1];
243            if try_parse_table_separator(next_line).is_some()
244                || try_parse_pipe_separator(next_line).is_some()
245            {
246                return true;
247            }
248        }
249    }
250
251    false
252}
253
254/// Find caption before table (if any).
255/// Returns (caption_start, caption_end) positions, or None.
256fn find_caption_before_table(lines: &[&str], table_start: usize) -> Option<(usize, usize)> {
257    if table_start == 0 {
258        return None;
259    }
260
261    // Look backward for a caption
262    // Caption must be immediately before table (with possible blank line between)
263    let mut pos = table_start - 1;
264
265    // Skip one blank line if present
266    if lines[pos].trim().is_empty() {
267        if pos == 0 {
268            return None;
269        }
270        pos -= 1;
271    }
272
273    // Now pos points to the last non-blank line before the table
274    // This could be the last line of a multiline caption, or a single-line caption
275    let caption_end = pos + 1; // End is exclusive
276
277    // If this line is NOT a caption start, it might be a continuation line
278    // Scan backward through non-blank lines to find the caption start
279    if !is_valid_caption_start_before_table(lines, pos) {
280        // Not a caption start - check if there's a caption start above
281        let mut scan_pos = pos;
282        while scan_pos > 0 {
283            scan_pos -= 1;
284            let line = lines[scan_pos];
285
286            // If we hit a blank line, we've gone too far
287            if line.trim().is_empty() {
288                return None;
289            }
290
291            // If we find a caption start, this is the beginning of the multiline caption
292            if is_valid_caption_start_before_table(lines, scan_pos) {
293                if scan_pos > 0 && !lines[scan_pos - 1].trim().is_empty() {
294                    return None;
295                }
296                if previous_nonblank_looks_like_table(lines, scan_pos) {
297                    return None;
298                }
299                return Some((scan_pos, caption_end));
300            }
301        }
302        // Scanned to beginning without finding caption start
303        None
304    } else {
305        if pos > 0 && !lines[pos - 1].trim().is_empty() {
306            return None;
307        }
308        if previous_nonblank_looks_like_table(lines, pos) {
309            return None;
310        }
311        // This line is a caption start - return the range
312        Some((pos, caption_end))
313    }
314}
315
316fn previous_nonblank_looks_like_table(lines: &[&str], pos: usize) -> bool {
317    if pos == 0 {
318        return false;
319    }
320    let mut i = pos;
321    while i > 0 {
322        i -= 1;
323        let line = lines[i].trim();
324        if line.is_empty() {
325            continue;
326        }
327        return line_looks_like_table_syntax(line);
328    }
329    false
330}
331
332fn line_looks_like_table_syntax(line: &str) -> bool {
333    if line.starts_with('|') && line.matches('|').count() >= 2 {
334        return true;
335    }
336    if line.starts_with('+') && line.ends_with('+') && (line.contains('-') || line.contains('=')) {
337        return true;
338    }
339    try_parse_table_separator(line).is_some()
340        || try_parse_pipe_separator(line).is_some()
341        || try_parse_grid_separator(line).is_some()
342}
343
344/// Find caption after table (if any).
345/// Returns (caption_start, caption_end) positions, or None.
346fn find_caption_after_table(lines: &[&str], table_end: usize) -> Option<(usize, usize)> {
347    if table_end >= lines.len() {
348        return None;
349    }
350
351    let mut pos = table_end;
352
353    // Skip one blank line if present
354    if pos < lines.len() && lines[pos].trim().is_empty() {
355        pos += 1;
356    }
357
358    if pos >= lines.len() {
359        return None;
360    }
361
362    // Check if this line is a caption
363    if is_table_caption_start(lines[pos]) {
364        let caption_start = pos;
365        // Find end of caption (continues until blank line)
366        let mut caption_end = caption_start + 1;
367        while caption_end < lines.len() && !lines[caption_end].trim().is_empty() {
368            caption_end += 1;
369        }
370        Some((caption_start, caption_end))
371    } else {
372        None
373    }
374}
375
376/// Emit a table caption node.
377fn emit_table_caption(
378    builder: &mut GreenNodeBuilder<'static>,
379    lines: &[&str],
380    start: usize,
381    end: usize,
382    config: &ParserOptions,
383) {
384    builder.start_node(SyntaxKind::TABLE_CAPTION.into());
385
386    for (i, line) in lines[start..end].iter().enumerate() {
387        if i == 0 {
388            // First line - parse and emit prefix separately
389            let trimmed = line.trim_start();
390            let leading_ws_len = line.len() - trimmed.len();
391
392            // Emit leading whitespace if present
393            if leading_ws_len > 0 {
394                builder.token(SyntaxKind::WHITESPACE.into(), &line[..leading_ws_len]);
395            }
396
397            // Check for caption prefix and emit separately
398            // Calculate where the prefix ends (after trimmed content)
399            let prefix_and_rest = if line.ends_with('\n') {
400                &line[leading_ws_len..line.len() - 1] // Exclude newline
401            } else {
402                &line[leading_ws_len..]
403            };
404
405            let (prefix_len, prefix_text) = if prefix_and_rest.starts_with("Table: ") {
406                (7, "Table: ")
407            } else if prefix_and_rest.starts_with("table: ") {
408                (7, "table: ")
409            } else if prefix_and_rest.starts_with(": ") {
410                (2, ": ")
411            } else if prefix_and_rest.starts_with(':') {
412                (1, ":")
413            } else {
414                (0, "")
415            };
416
417            if prefix_len > 0 {
418                builder.token(SyntaxKind::TABLE_CAPTION_PREFIX.into(), prefix_text);
419
420                // Emit rest of line after prefix
421                let rest_start = leading_ws_len + prefix_len;
422                if rest_start < line.len() {
423                    // Get the caption text (excluding newline)
424                    let (caption_text, newline_str) = strip_newline(&line[rest_start..]);
425
426                    if !caption_text.is_empty() {
427                        inline_emission::emit_inlines(builder, caption_text, config);
428                    }
429
430                    if !newline_str.is_empty() {
431                        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
432                    }
433                }
434            } else {
435                // No recognized prefix, emit whole trimmed line
436                let (text, newline_str) = strip_newline(&line[leading_ws_len..]);
437
438                if !text.is_empty() {
439                    inline_emission::emit_inlines(builder, text, config);
440                }
441
442                if !newline_str.is_empty() {
443                    builder.token(SyntaxKind::NEWLINE.into(), newline_str);
444                }
445            }
446        } else {
447            // Continuation lines - emit with inline parsing
448            let (text, newline_str) = strip_newline(line);
449
450            if !text.is_empty() {
451                inline_emission::emit_inlines(builder, text, config);
452            }
453
454            if !newline_str.is_empty() {
455                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
456            }
457        }
458    }
459
460    builder.finish_node(); // TABLE_CAPTION
461}
462
463/// Emit a table cell with inline content parsing.
464/// This is the core helper for Phase 7.1 table inline parsing migration.
465fn emit_table_cell(
466    builder: &mut GreenNodeBuilder<'static>,
467    cell_text: &str,
468    config: &ParserOptions,
469) {
470    builder.start_node(SyntaxKind::TABLE_CELL.into());
471
472    // Parse inline content within the cell
473    if !cell_text.is_empty() {
474        inline_emission::emit_inlines(builder, cell_text, config);
475    }
476
477    builder.finish_node(); // TABLE_CELL
478}
479
480/// Determine column alignments based on separator and optional header.
481fn determine_alignments(columns: &mut [Column], separator_line: &str, header_line: Option<&str>) {
482    for col in columns.iter_mut() {
483        let sep_slice = &separator_line[col.start..col.end];
484
485        if let Some(header) = header_line {
486            // Extract header text for this column
487            let header_text = if col.end <= header.len() {
488                header[col.start..col.end].trim()
489            } else if col.start < header.len() {
490                header[col.start..].trim()
491            } else {
492                ""
493            };
494
495            if header_text.is_empty() {
496                col.alignment = Alignment::Default;
497                continue;
498            }
499
500            // Find where the header text starts and ends within the column
501            let header_in_col = &header[col.start..col.end.min(header.len())];
502            let text_start = header_in_col.len() - header_in_col.trim_start().len();
503            let text_end = header_in_col.trim_end().len() + text_start;
504
505            // Check dash alignment relative to text
506            let dashes_start = 0; // Dashes start at beginning of sep_slice
507            let dashes_end = sep_slice.len();
508
509            let flush_left = dashes_start == text_start;
510            let flush_right = dashes_end == text_end;
511
512            col.alignment = match (flush_left, flush_right) {
513                (true, true) => Alignment::Default,
514                (true, false) => Alignment::Left,
515                (false, true) => Alignment::Right,
516                (false, false) => Alignment::Center,
517            };
518        } else {
519            // Without header, alignment based on first row (we'll handle this later)
520            col.alignment = Alignment::Default;
521        }
522    }
523}
524
525/// Try to parse a simple table starting at the given position.
526/// Returns the number of lines consumed if successful.
527pub(crate) fn try_parse_simple_table(
528    lines: &[&str],
529    start_pos: usize,
530    builder: &mut GreenNodeBuilder<'static>,
531    config: &ParserOptions,
532) -> Option<usize> {
533    log::debug!("try_parse_simple_table at line {}", start_pos + 1);
534
535    if start_pos >= lines.len() {
536        return None;
537    }
538
539    // Look for a separator line
540    let separator_pos = find_separator_line(lines, start_pos)?;
541    log::debug!("  found separator at line {}", separator_pos + 1);
542
543    let separator_line = lines[separator_pos];
544    let mut columns = try_parse_table_separator(separator_line)?;
545
546    // Determine if there's a header (separator not at start)
547    let has_header = separator_pos > start_pos;
548    let header_line = if has_header {
549        Some(lines[separator_pos - 1])
550    } else {
551        None
552    };
553
554    // Determine alignments
555    determine_alignments(&mut columns, separator_line, header_line);
556
557    // Find table end (blank line or end of input)
558    let end_pos = find_table_end(lines, separator_pos + 1);
559
560    // Must have at least one data row (or it's just a separator)
561    let data_rows = end_pos - separator_pos - 1;
562
563    if data_rows == 0 {
564        return None;
565    }
566
567    // Check for caption before table
568    let caption_before = find_caption_before_table(lines, start_pos);
569
570    // Check for caption after table
571    let caption_after = find_caption_after_table(lines, end_pos);
572
573    // Build the table
574    builder.start_node(SyntaxKind::SIMPLE_TABLE.into());
575
576    // Emit caption before if present
577    if let Some((cap_start, cap_end)) = caption_before {
578        emit_table_caption(builder, lines, cap_start, cap_end, config);
579    }
580
581    // Emit header if present
582    if has_header {
583        emit_table_row(
584            builder,
585            lines[separator_pos - 1],
586            &columns,
587            SyntaxKind::TABLE_HEADER,
588            config,
589        );
590    }
591
592    // Emit separator
593    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
594    emit_line_tokens(builder, separator_line);
595    builder.finish_node();
596
597    // Emit data rows
598    for line in lines.iter().take(end_pos).skip(separator_pos + 1) {
599        emit_table_row(builder, line, &columns, SyntaxKind::TABLE_ROW, config);
600    }
601
602    // Emit caption after if present
603    if let Some((cap_start, cap_end)) = caption_after {
604        // Emit blank line before caption if needed
605        if cap_start > end_pos {
606            builder.start_node(SyntaxKind::BLANK_LINE.into());
607            builder.token(SyntaxKind::BLANK_LINE.into(), "\n");
608            builder.finish_node();
609        }
610        emit_table_caption(builder, lines, cap_start, cap_end, config);
611    }
612
613    builder.finish_node(); // SimpleTable
614
615    // Calculate lines consumed (including captions)
616    let table_start = if let Some((cap_start, _)) = caption_before {
617        cap_start
618    } else if has_header {
619        separator_pos - 1
620    } else {
621        separator_pos
622    };
623
624    let table_end = if let Some((_, cap_end)) = caption_after {
625        cap_end
626    } else {
627        end_pos
628    };
629
630    let lines_consumed = table_end - table_start;
631
632    Some(lines_consumed)
633}
634
635/// Find the position of a separator line starting from pos.
636fn find_separator_line(lines: &[&str], start_pos: usize) -> Option<usize> {
637    log::debug!("  find_separator_line from line {}", start_pos + 1);
638
639    // Check first line
640    log::debug!("    checking first line: {:?}", lines[start_pos]);
641    if try_parse_table_separator(lines[start_pos]).is_some() {
642        log::debug!("    separator found at first line");
643        return Some(start_pos);
644    }
645
646    // Check second line (for table with header)
647    if start_pos + 1 < lines.len()
648        && !lines[start_pos].trim().is_empty()
649        && try_parse_table_separator(lines[start_pos + 1]).is_some()
650    {
651        return Some(start_pos + 1);
652    }
653
654    None
655}
656
657/// Find where the table ends (first blank line or end of input).
658fn find_table_end(lines: &[&str], start_pos: usize) -> usize {
659    for i in start_pos..lines.len() {
660        if lines[i].trim().is_empty() {
661            return i;
662        }
663        // Check if this could be a closing separator
664        if try_parse_table_separator(lines[i]).is_some() {
665            // Check if next line is blank or end
666            if i + 1 >= lines.len() || lines[i + 1].trim().is_empty() {
667                return i + 1;
668            }
669        }
670    }
671    lines.len()
672}
673
674/// Emit a table row (header or data row) with inline-parsed cells for simple tables.
675/// Uses column boundaries from the separator line to extract cells.
676fn emit_table_row(
677    builder: &mut GreenNodeBuilder<'static>,
678    line: &str,
679    columns: &[Column],
680    row_kind: SyntaxKind,
681    config: &ParserOptions,
682) {
683    builder.start_node(row_kind.into());
684
685    let (line_without_newline, newline_str) = strip_newline(line);
686
687    // Emit leading whitespace if present
688    let trimmed = line_without_newline.trim_start();
689    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
690    if leading_ws_len > 0 {
691        builder.token(
692            SyntaxKind::WHITESPACE.into(),
693            &line_without_newline[..leading_ws_len],
694        );
695    }
696
697    // Track where we are in the line (for losslessness)
698    let mut current_pos = 0;
699
700    // Extract and emit cells based on column boundaries
701    for col in columns.iter() {
702        // Calculate actual positions in the trimmed line (accounting for leading whitespace)
703        let cell_start = if col.start >= leading_ws_len {
704            (col.start - leading_ws_len).min(trimmed.len())
705        } else {
706            0
707        };
708
709        let cell_end = if col.end >= leading_ws_len {
710            (col.end - leading_ws_len).min(trimmed.len())
711        } else {
712            0
713        };
714
715        // Extract cell text from column bounds
716        let cell_text = if cell_start < cell_end && cell_start < trimmed.len() {
717            &trimmed[cell_start..cell_end]
718        } else if cell_start < trimmed.len() {
719            &trimmed[cell_start..]
720        } else {
721            ""
722        };
723
724        let cell_content = cell_text.trim();
725        let cell_content_start = cell_text.len() - cell_text.trim_start().len();
726
727        // Emit any whitespace from current position to start of cell content
728        let content_abs_pos = (cell_start + cell_content_start).min(trimmed.len());
729        if current_pos < content_abs_pos {
730            builder.token(
731                SyntaxKind::WHITESPACE.into(),
732                &trimmed[current_pos..content_abs_pos],
733            );
734        }
735
736        // Emit cell with inline parsing
737        emit_table_cell(builder, cell_content, config);
738
739        // Update current position to end of cell content
740        current_pos = content_abs_pos + cell_content.len();
741    }
742
743    // Emit any remaining whitespace after last cell
744    if current_pos < trimmed.len() {
745        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
746    }
747
748    // Emit newline if present
749    if !newline_str.is_empty() {
750        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
751    }
752
753    builder.finish_node();
754}
755
756// ============================================================================
757// Pipe Table Parsing
758// ============================================================================
759
760/// Check if a line is a pipe table separator line.
761/// Returns the column alignments if it's a valid separator.
762fn try_parse_pipe_separator(line: &str) -> Option<Vec<Alignment>> {
763    let trimmed = line.trim();
764
765    // Must contain at least one pipe
766    if !trimmed.contains('|') && !trimmed.contains('+') {
767        return None;
768    }
769
770    // Split by pipes (or + for orgtbl variant)
771    let cells: Vec<&str> = if trimmed.contains('+') {
772        // Orgtbl variant: use + as separator in separator line
773        trimmed.split(['|', '+']).collect()
774    } else {
775        trimmed.split('|').collect()
776    };
777
778    let mut alignments = Vec::new();
779
780    for cell in cells {
781        let cell = cell.trim();
782
783        // Skip empty cells (from leading/trailing pipes)
784        if cell.is_empty() {
785            continue;
786        }
787
788        // Must be dashes with optional colons
789        let starts_colon = cell.starts_with(':');
790        let ends_colon = cell.ends_with(':');
791
792        // Remove colons to check if rest is all dashes
793        let without_colons = cell.trim_start_matches(':').trim_end_matches(':');
794
795        // Must have at least one dash
796        if without_colons.is_empty() || !without_colons.chars().all(|c| c == '-') {
797            return None;
798        }
799
800        // Determine alignment from colon positions
801        let alignment = match (starts_colon, ends_colon) {
802            (true, true) => Alignment::Center,
803            (true, false) => Alignment::Left,
804            (false, true) => Alignment::Right,
805            (false, false) => Alignment::Default,
806        };
807
808        alignments.push(alignment);
809    }
810
811    // Must have at least one column
812    if alignments.is_empty() {
813        None
814    } else {
815        Some(alignments)
816    }
817}
818
819/// Split a pipe table row into cells.
820/// Handles escaped pipes (\|) properly by not splitting on them.
821fn parse_pipe_table_row(line: &str) -> Vec<String> {
822    let trimmed = line.trim();
823
824    let mut cells = Vec::new();
825    let mut current_cell = String::new();
826    let mut chars = trimmed.chars().peekable();
827    let mut char_count = 0;
828
829    while let Some(ch) = chars.next() {
830        char_count += 1;
831        match ch {
832            '\\' => {
833                // Check if next char is a pipe - if so, it's an escaped pipe
834                if let Some(&'|') = chars.peek() {
835                    current_cell.push('\\');
836                    current_cell.push('|');
837                    chars.next(); // consume the pipe
838                } else {
839                    current_cell.push(ch);
840                }
841            }
842            '|' => {
843                // Check if this is the leading pipe (first character)
844                if char_count == 1 {
845                    continue; // Skip leading pipe
846                }
847
848                // End current cell, start new one
849                cells.push(current_cell.trim().to_string());
850                current_cell.clear();
851            }
852            _ => {
853                current_cell.push(ch);
854            }
855        }
856    }
857
858    // Add last cell if it's not empty (it would be empty if line ended with pipe)
859    let trimmed_cell = current_cell.trim().to_string();
860    if !trimmed_cell.is_empty() {
861        cells.push(trimmed_cell);
862    }
863
864    cells
865}
866
867/// Emit a pipe table row with inline-parsed cells.
868/// Preserves losslessness by emitting exact byte representation while parsing cell content inline.
869fn emit_pipe_table_row(
870    builder: &mut GreenNodeBuilder<'static>,
871    line: &str,
872    row_kind: SyntaxKind,
873    config: &ParserOptions,
874) {
875    builder.start_node(row_kind.into());
876
877    let (line_without_newline, newline_str) = strip_newline(line);
878    let trimmed = line_without_newline.trim();
879
880    // Parse cell boundaries
881    let mut cell_starts = Vec::new();
882    let mut cell_ends = Vec::new();
883    let mut in_escape = false;
884
885    // Find all pipe positions (excluding escaped ones)
886    let mut pipe_positions = Vec::new();
887    for (i, ch) in trimmed.char_indices() {
888        if in_escape {
889            in_escape = false;
890            continue;
891        }
892        if ch == '\\' {
893            in_escape = true;
894            continue;
895        }
896        if ch == '|' {
897            pipe_positions.push(i);
898        }
899    }
900
901    // Determine cell boundaries based on pipe positions
902    if pipe_positions.is_empty() {
903        // No pipes - treat entire line as one cell (shouldn't happen for valid pipe tables)
904        cell_starts.push(0);
905        cell_ends.push(trimmed.len());
906    } else {
907        // Check if line starts with pipe
908        let start_pipe = pipe_positions.first() == Some(&0);
909        // Check if line ends with pipe
910        let end_pipe = pipe_positions.last() == Some(&(trimmed.len() - 1));
911
912        if start_pipe {
913            // Skip first pipe
914            for i in 1..pipe_positions.len() {
915                cell_starts.push(pipe_positions[i - 1] + 1);
916                cell_ends.push(pipe_positions[i]);
917            }
918            // Add last cell if there's no trailing pipe
919            if !end_pipe {
920                cell_starts.push(*pipe_positions.last().unwrap() + 1);
921                cell_ends.push(trimmed.len());
922            }
923        } else {
924            // No leading pipe
925            cell_starts.push(0);
926            cell_ends.push(pipe_positions[0]);
927
928            for i in 1..pipe_positions.len() {
929                cell_starts.push(pipe_positions[i - 1] + 1);
930                cell_ends.push(pipe_positions[i]);
931            }
932
933            // Add last cell if there's no trailing pipe
934            if !end_pipe {
935                cell_starts.push(*pipe_positions.last().unwrap() + 1);
936                cell_ends.push(trimmed.len());
937            }
938        }
939    }
940
941    // Emit leading whitespace if present (before trim)
942    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
943    if leading_ws_len > 0 {
944        builder.token(
945            SyntaxKind::WHITESPACE.into(),
946            &line_without_newline[..leading_ws_len],
947        );
948    }
949
950    // Emit cells with pipes
951    for (idx, (start, end)) in cell_starts.iter().zip(cell_ends.iter()).enumerate() {
952        // Emit pipe before cell (except for first cell if no leading pipe)
953        if *start > 0 {
954            builder.token(SyntaxKind::TEXT.into(), "|");
955        } else if idx == 0 && trimmed.starts_with('|') {
956            // Leading pipe
957            builder.token(SyntaxKind::TEXT.into(), "|");
958        }
959
960        // Get cell content with its whitespace
961        let cell_with_ws = &trimmed[*start..*end];
962        let cell_content = cell_with_ws.trim();
963
964        // Emit leading whitespace within cell
965        let cell_leading_ws = &cell_with_ws[..cell_with_ws.len() - cell_with_ws.trim_start().len()];
966        if !cell_leading_ws.is_empty() {
967            builder.token(SyntaxKind::WHITESPACE.into(), cell_leading_ws);
968        }
969
970        // Emit cell with inline parsing
971        emit_table_cell(builder, cell_content, config);
972
973        // Emit trailing whitespace within cell
974        let cell_trailing_ws_start = cell_leading_ws.len() + cell_content.len();
975        if cell_trailing_ws_start < cell_with_ws.len() {
976            builder.token(
977                SyntaxKind::WHITESPACE.into(),
978                &cell_with_ws[cell_trailing_ws_start..],
979            );
980        }
981    }
982
983    // Emit trailing pipe if present
984    if !pipe_positions.is_empty() && trimmed.ends_with('|') {
985        builder.token(SyntaxKind::TEXT.into(), "|");
986    }
987
988    // Emit trailing whitespace after trim (before newline)
989    let trailing_ws_start = leading_ws_len + trimmed.len();
990    if trailing_ws_start < line_without_newline.len() {
991        builder.token(
992            SyntaxKind::WHITESPACE.into(),
993            &line_without_newline[trailing_ws_start..],
994        );
995    }
996
997    // Emit newline
998    if !newline_str.is_empty() {
999        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1000    }
1001
1002    builder.finish_node();
1003}
1004
1005/// Try to parse a pipe table starting at the given position.
1006/// Returns the number of lines consumed if successful.
1007pub(crate) fn try_parse_pipe_table(
1008    lines: &[&str],
1009    start_pos: usize,
1010    builder: &mut GreenNodeBuilder<'static>,
1011    config: &ParserOptions,
1012) -> Option<usize> {
1013    if start_pos + 1 >= lines.len() {
1014        return None;
1015    }
1016
1017    // Check if this line is a caption followed by a table
1018    // If so, the actual table starts after the caption and blank line
1019    let (actual_start, has_caption_before) = if is_caption_followed_by_table(lines, start_pos) {
1020        // Skip caption line
1021        let mut pos = start_pos + 1;
1022        // Skip blank line if present
1023        while pos < lines.len() && lines[pos].trim().is_empty() {
1024            pos += 1;
1025        }
1026        (pos, true)
1027    } else {
1028        (start_pos, false)
1029    };
1030
1031    if actual_start + 1 >= lines.len() {
1032        return None;
1033    }
1034
1035    // First line should have pipes (potential header)
1036    let header_line = lines[actual_start];
1037    if !header_line.contains('|') {
1038        return None;
1039    }
1040
1041    // Second line should be separator
1042    let separator_line = lines[actual_start + 1];
1043    let alignments = try_parse_pipe_separator(separator_line)?;
1044
1045    // Parse header cells
1046    let header_cells = parse_pipe_table_row(header_line);
1047
1048    // Number of columns should match (approximately - be lenient)
1049    if header_cells.len() != alignments.len() && !header_cells.is_empty() {
1050        // Only fail if very different
1051        if header_cells.len() < alignments.len() / 2 || header_cells.len() > alignments.len() * 2 {
1052            return None;
1053        }
1054    }
1055
1056    // Find table end (first blank line or end of input)
1057    let mut end_pos = actual_start + 2;
1058    while end_pos < lines.len() {
1059        let line = lines[end_pos];
1060        if line.trim().is_empty() {
1061            break;
1062        }
1063        // Row should have pipes
1064        if !line.contains('|') {
1065            break;
1066        }
1067        end_pos += 1;
1068    }
1069
1070    // Must have at least one data row
1071    if end_pos <= actual_start + 2 {
1072        return None;
1073    }
1074
1075    // Check for caption before table (only if we didn't already detect it)
1076    let caption_before = if has_caption_before {
1077        Some((start_pos, start_pos + 1)) // Single-line caption detected earlier
1078    } else {
1079        find_caption_before_table(lines, actual_start)
1080    };
1081
1082    // Check for caption after table
1083    let caption_after = find_caption_after_table(lines, end_pos);
1084
1085    // Build the pipe table
1086    builder.start_node(SyntaxKind::PIPE_TABLE.into());
1087
1088    // Emit caption before if present
1089    if let Some((cap_start, cap_end)) = caption_before {
1090        emit_table_caption(builder, lines, cap_start, cap_end, config);
1091        // Emit blank line between caption and table if present
1092        if cap_end < actual_start {
1093            for line in lines.iter().take(actual_start).skip(cap_end) {
1094                if line.trim().is_empty() {
1095                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1096                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1097                    builder.finish_node();
1098                }
1099            }
1100        }
1101    }
1102
1103    // Emit header row with inline-parsed cells
1104    emit_pipe_table_row(builder, header_line, SyntaxKind::TABLE_HEADER, config);
1105
1106    // Emit separator
1107    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1108    emit_line_tokens(builder, separator_line);
1109    builder.finish_node();
1110
1111    // Emit data rows with inline-parsed cells
1112    for line in lines.iter().take(end_pos).skip(actual_start + 2) {
1113        emit_pipe_table_row(builder, line, SyntaxKind::TABLE_ROW, config);
1114    }
1115
1116    // Emit caption after if present
1117    if let Some((cap_start, cap_end)) = caption_after {
1118        // Emit blank line before caption if needed
1119        if cap_start > end_pos {
1120            builder.start_node(SyntaxKind::BLANK_LINE.into());
1121            builder.token(SyntaxKind::BLANK_LINE.into(), "\n");
1122            builder.finish_node();
1123        }
1124        emit_table_caption(builder, lines, cap_start, cap_end, config);
1125    }
1126
1127    builder.finish_node(); // PipeTable
1128
1129    // Calculate lines consumed
1130    let table_start = caption_before
1131        .map(|(start, _)| start)
1132        .unwrap_or(actual_start);
1133    let table_end = if let Some((_, cap_end)) = caption_after {
1134        cap_end
1135    } else {
1136        end_pos
1137    };
1138
1139    Some(table_end - table_start)
1140}
1141
1142#[cfg(test)]
1143mod tests {
1144    use super::*;
1145
1146    #[test]
1147    fn test_separator_detection() {
1148        assert!(try_parse_table_separator("------- ------ ----------   -------").is_some());
1149        assert!(try_parse_table_separator("  ---  ---  ---").is_some());
1150        assert!(try_parse_table_separator("-------").is_none()); // horizontal rule
1151        assert!(try_parse_table_separator("--- --- ---").is_some()); // table separator
1152    }
1153
1154    #[test]
1155    fn test_column_extraction() {
1156        let line = "-------     ------ ----------   -------";
1157        let columns = extract_columns(line, 0);
1158        assert_eq!(columns.len(), 4);
1159    }
1160
1161    #[test]
1162    fn test_simple_table_with_header() {
1163        let input = vec![
1164            "  Right     Left     Center     Default",
1165            "-------     ------ ----------   -------",
1166            "     12     12        12            12",
1167            "    123     123       123          123",
1168            "",
1169        ];
1170
1171        let mut builder = GreenNodeBuilder::new();
1172        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1173
1174        assert!(result.is_some());
1175        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1176    }
1177
1178    #[test]
1179    fn test_headerless_table() {
1180        let input = vec![
1181            "-------     ------ ----------   -------",
1182            "     12     12        12            12",
1183            "    123     123       123          123",
1184            "",
1185        ];
1186
1187        let mut builder = GreenNodeBuilder::new();
1188        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1189
1190        assert!(result.is_some());
1191        assert_eq!(result.unwrap(), 3); // sep + 2 rows
1192    }
1193
1194    #[test]
1195    fn test_caption_prefix_detection() {
1196        assert!(try_parse_caption_prefix("Table: My caption").is_some());
1197        assert!(try_parse_caption_prefix("table: My caption").is_some());
1198        assert!(try_parse_caption_prefix(": My caption").is_some());
1199        assert!(try_parse_caption_prefix(":").is_none()); // Just colon, no content
1200        assert!(try_parse_caption_prefix("Not a caption").is_none());
1201    }
1202
1203    #[test]
1204    fn bare_colon_fenced_code_is_not_table_caption() {
1205        let input = "Term\n: ```\n  code\n  ```\n";
1206        let tree = crate::parse(input, None);
1207
1208        assert!(
1209            tree.descendants()
1210                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1211            "should parse as definition list"
1212        );
1213        assert!(
1214            tree.descendants()
1215                .any(|node| node.kind() == SyntaxKind::CODE_BLOCK),
1216            "definition should preserve fenced code block"
1217        );
1218        assert!(
1219            !tree
1220                .descendants()
1221                .any(|node| node.kind() == SyntaxKind::TABLE_CAPTION),
1222            "fenced code definition should not be parsed as table caption"
1223        );
1224    }
1225
1226    #[test]
1227    fn test_table_with_caption_after() {
1228        let input = vec![
1229            "  Right     Left     Center     Default",
1230            "-------     ------ ----------   -------",
1231            "     12     12        12            12",
1232            "    123     123       123          123",
1233            "",
1234            "Table: Demonstration of simple table syntax.",
1235            "",
1236        ];
1237
1238        let mut builder = GreenNodeBuilder::new();
1239        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1240
1241        assert!(result.is_some());
1242        // Should consume: header + sep + 2 rows + blank + caption
1243        assert_eq!(result.unwrap(), 6);
1244    }
1245
1246    #[test]
1247    fn test_table_with_caption_before() {
1248        let input = vec![
1249            "Table: Demonstration of simple table syntax.",
1250            "",
1251            "  Right     Left     Center     Default",
1252            "-------     ------ ----------   -------",
1253            "     12     12        12            12",
1254            "    123     123       123          123",
1255            "",
1256        ];
1257
1258        let mut builder = GreenNodeBuilder::new();
1259        let result = try_parse_simple_table(&input, 2, &mut builder, &ParserOptions::default());
1260
1261        assert!(result.is_some());
1262        // Should consume: caption + blank + header + sep + 2 rows
1263        assert_eq!(result.unwrap(), 6);
1264    }
1265
1266    #[test]
1267    fn test_caption_with_colon_prefix() {
1268        let input = vec![
1269            "  Right     Left",
1270            "-------     ------",
1271            "     12     12",
1272            "",
1273            ": Short caption",
1274            "",
1275        ];
1276
1277        let mut builder = GreenNodeBuilder::new();
1278        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1279
1280        assert!(result.is_some());
1281        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1282    }
1283
1284    #[test]
1285    fn test_multiline_caption() {
1286        let input = vec![
1287            "  Right     Left",
1288            "-------     ------",
1289            "     12     12",
1290            "",
1291            "Table: This is a longer caption",
1292            "that spans multiple lines.",
1293            "",
1294        ];
1295
1296        let mut builder = GreenNodeBuilder::new();
1297        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1298
1299        assert!(result.is_some());
1300        // Should consume through end of multi-line caption
1301        assert_eq!(result.unwrap(), 6);
1302    }
1303
1304    // Pipe table tests
1305    #[test]
1306    fn test_pipe_separator_detection() {
1307        assert!(try_parse_pipe_separator("|------:|:-----|---------|:------:|").is_some());
1308        assert!(try_parse_pipe_separator("|---|---|").is_some());
1309        assert!(try_parse_pipe_separator("-----|-----:").is_some()); // No leading pipe
1310        assert!(try_parse_pipe_separator("|-----+-------|").is_some()); // Orgtbl variant
1311        assert!(try_parse_pipe_separator("not a separator").is_none());
1312    }
1313
1314    #[test]
1315    fn test_pipe_alignments() {
1316        let aligns = try_parse_pipe_separator("|------:|:-----|---------|:------:|").unwrap();
1317        assert_eq!(aligns.len(), 4);
1318        assert_eq!(aligns[0], Alignment::Right);
1319        assert_eq!(aligns[1], Alignment::Left);
1320        assert_eq!(aligns[2], Alignment::Default);
1321        assert_eq!(aligns[3], Alignment::Center);
1322    }
1323
1324    #[test]
1325    fn test_parse_pipe_table_row() {
1326        let cells = parse_pipe_table_row("| Right | Left | Center |");
1327        assert_eq!(cells.len(), 3);
1328        assert_eq!(cells[0], "Right");
1329        assert_eq!(cells[1], "Left");
1330        assert_eq!(cells[2], "Center");
1331
1332        // Without leading/trailing pipes
1333        let cells2 = parse_pipe_table_row("Right | Left | Center");
1334        assert_eq!(cells2.len(), 3);
1335    }
1336
1337    #[test]
1338    fn test_basic_pipe_table() {
1339        let input = vec![
1340            "",
1341            "| Right | Left | Center |",
1342            "|------:|:-----|:------:|",
1343            "|   12  |  12  |   12   |",
1344            "|  123  |  123 |  123   |",
1345            "",
1346        ];
1347
1348        let mut builder = GreenNodeBuilder::new();
1349        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1350
1351        assert!(result.is_some());
1352        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1353    }
1354
1355    #[test]
1356    fn test_pipe_table_no_edge_pipes() {
1357        let input = vec![
1358            "",
1359            "fruit| price",
1360            "-----|-----:",
1361            "apple|2.05",
1362            "pear|1.37",
1363            "",
1364        ];
1365
1366        let mut builder = GreenNodeBuilder::new();
1367        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1368
1369        assert!(result.is_some());
1370        assert_eq!(result.unwrap(), 4);
1371    }
1372
1373    #[test]
1374    fn test_pipe_table_with_caption() {
1375        let input = vec![
1376            "",
1377            "| Col1 | Col2 |",
1378            "|------|------|",
1379            "| A    | B    |",
1380            "",
1381            "Table: My pipe table",
1382            "",
1383        ];
1384
1385        let mut builder = GreenNodeBuilder::new();
1386        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1387
1388        assert!(result.is_some());
1389        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1390    }
1391}
1392
1393// ============================================================================
1394// Grid Table Parsing
1395// ============================================================================
1396
1397/// Check if a line is a grid table row separator (starts with +, contains -, ends with +).
1398/// Returns Some(vec of column info) if valid, None otherwise.
1399fn try_parse_grid_separator(line: &str) -> Option<Vec<GridColumn>> {
1400    let trimmed = line.trim_start();
1401    let leading_spaces = line.len() - trimmed.len();
1402
1403    // Must have leading spaces <= 3 to not be a code block
1404    if leading_spaces > 3 {
1405        return None;
1406    }
1407
1408    // Must start with + and end with +
1409    if !trimmed.starts_with('+') || !trimmed.trim_end().ends_with('+') {
1410        return None;
1411    }
1412
1413    // Split by + to get column segments
1414    let trimmed = trimmed.trim_end();
1415    let segments: Vec<&str> = trimmed.split('+').collect();
1416
1417    // Need at least 3 parts: empty before first +, column(s), empty after last +
1418    if segments.len() < 3 {
1419        return None;
1420    }
1421
1422    let mut columns = Vec::new();
1423
1424    // Parse each segment between + signs
1425    for segment in segments.iter().skip(1).take(segments.len() - 2) {
1426        if segment.is_empty() {
1427            continue;
1428        }
1429
1430        // Segment must be dashes/equals with optional colons for alignment
1431        let seg_trimmed = *segment;
1432
1433        // Get the fill character (after removing colons)
1434        let inner = seg_trimmed.trim_start_matches(':').trim_end_matches(':');
1435
1436        // Must be all dashes or all equals
1437        if inner.is_empty() {
1438            return None;
1439        }
1440
1441        let first_char = inner.chars().next().unwrap();
1442        if first_char != '-' && first_char != '=' {
1443            return None;
1444        }
1445
1446        if !inner.chars().all(|c| c == first_char) {
1447            return None;
1448        }
1449
1450        let is_header_sep = first_char == '=';
1451
1452        columns.push(GridColumn {
1453            is_header_separator: is_header_sep,
1454            width: seg_trimmed.chars().count(),
1455        });
1456    }
1457
1458    if columns.is_empty() {
1459        None
1460    } else {
1461        Some(columns)
1462    }
1463}
1464
1465/// Column information for grid tables.
1466#[derive(Debug, Clone)]
1467struct GridColumn {
1468    is_header_separator: bool,
1469    width: usize,
1470}
1471
1472fn slice_cell_by_display_width(line: &str, start_byte: usize, width: usize) -> (usize, usize) {
1473    let mut end_byte = start_byte;
1474    let mut display_cols = 0usize;
1475
1476    for (offset, ch) in line[start_byte..].char_indices() {
1477        if ch == '|' {
1478            let sep_byte = start_byte + offset;
1479            return (sep_byte, sep_byte + 1);
1480        }
1481        let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
1482        if display_cols + ch_width > width {
1483            break;
1484        }
1485        display_cols += ch_width;
1486        end_byte = start_byte + offset + ch.len_utf8();
1487        if display_cols >= width {
1488            break;
1489        }
1490    }
1491
1492    // If the width budget is exhausted before seeing a separator (for example
1493    // because of padding/layout drift), advance to the next literal separator
1494    // to keep row slicing aligned and preserve losslessness.
1495    let mut sep_byte = end_byte;
1496    while sep_byte < line.len() {
1497        let mut chars = line[sep_byte..].chars();
1498        let Some(ch) = chars.next() else {
1499            break;
1500        };
1501        if ch == '|' {
1502            return (sep_byte, sep_byte + 1);
1503        }
1504        sep_byte += ch.len_utf8();
1505    }
1506
1507    (end_byte, end_byte)
1508}
1509
1510/// Check if a line is a grid table content row.
1511/// Accepts normal rows ending with `|` and spanning-style continuation lines ending with `+`.
1512fn is_grid_content_row(line: &str) -> bool {
1513    let trimmed = line.trim_start();
1514    let leading_spaces = line.len() - trimmed.len();
1515
1516    if leading_spaces > 3 {
1517        return false;
1518    }
1519
1520    let trimmed = trimmed.trim_end();
1521    trimmed.starts_with('|') && (trimmed.ends_with('|') || trimmed.ends_with('+'))
1522}
1523
1524/// Extract cell contents from a single grid table row line.
1525/// Returns a vector of cell contents (trimmed) based on column boundaries.
1526/// Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1527fn extract_grid_cells_from_line(line: &str, _columns: &[GridColumn]) -> Vec<String> {
1528    let (line_content, _) = strip_newline(line);
1529    let line_trimmed = line_content.trim();
1530
1531    if !line_trimmed.starts_with('|') || !line_trimmed.ends_with('|') {
1532        return vec![String::new(); _columns.len()];
1533    }
1534
1535    let mut cells = Vec::with_capacity(_columns.len());
1536    let mut pos_byte = 1; // Skip leading pipe
1537
1538    for col in _columns {
1539        let col_idx = cells.len();
1540        if pos_byte >= line_trimmed.len() {
1541            cells.push(String::new());
1542            continue;
1543        }
1544
1545        let start_byte = pos_byte;
1546        let end_byte = if col_idx + 1 == _columns.len() {
1547            line_trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1548        } else {
1549            let (end, next_start) = slice_cell_by_display_width(line_trimmed, pos_byte, col.width);
1550            pos_byte = next_start;
1551            end
1552        };
1553        cells.push(line_trimmed[start_byte..end_byte].trim().to_string());
1554        if col_idx + 1 == _columns.len() {
1555            pos_byte = line_trimmed.len();
1556        }
1557    }
1558
1559    cells
1560}
1561
1562/// Extract cell contents from multiple grid table row lines (for multi-line cells).
1563/// Concatenates cell contents across lines with newlines, then trims.
1564fn extract_grid_cells_multiline(lines: &[&str], columns: &[GridColumn]) -> Vec<String> {
1565    if lines.is_empty() {
1566        return vec![String::new(); columns.len()];
1567    }
1568
1569    extract_grid_cells_from_line(lines[0], columns)
1570}
1571
1572/// Emit a grid table row with inline-parsed cells.
1573/// Handles multi-line rows by emitting first line with TABLE_CELL nodes,
1574/// then continuation lines as raw TEXT for losslessness.
1575fn emit_grid_table_row(
1576    builder: &mut GreenNodeBuilder<'static>,
1577    lines: &[&str],
1578    columns: &[GridColumn],
1579    row_kind: SyntaxKind,
1580    config: &ParserOptions,
1581) {
1582    if lines.is_empty() {
1583        return;
1584    }
1585
1586    // Extract cell contents from the first line.
1587    let cell_contents = extract_grid_cells_multiline(lines, columns);
1588
1589    builder.start_node(row_kind.into());
1590
1591    // Emit first line with TABLE_CELL nodes
1592    // Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1593    let first_line = lines[0];
1594    let (line_without_newline, newline_str) = strip_newline(first_line);
1595    let trimmed = line_without_newline.trim();
1596    let expected_pipe_count = columns.len().saturating_add(1);
1597    let actual_pipe_count = trimmed.chars().filter(|&c| c == '|').count();
1598
1599    // Rows that don't contain all expected column separators (spanning-style rows)
1600    // must be emitted verbatim for losslessness.
1601    if actual_pipe_count != expected_pipe_count {
1602        emit_line_tokens(builder, first_line);
1603        for line in lines.iter().skip(1) {
1604            emit_line_tokens(builder, line);
1605        }
1606        builder.finish_node();
1607        return;
1608    }
1609
1610    // Emit leading whitespace
1611    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1612    if leading_ws_len > 0 {
1613        builder.token(
1614            SyntaxKind::WHITESPACE.into(),
1615            &line_without_newline[..leading_ws_len],
1616        );
1617    }
1618
1619    // Emit leading pipe
1620    if trimmed.starts_with('|') {
1621        builder.token(SyntaxKind::TEXT.into(), "|");
1622    }
1623
1624    // Emit each cell based on fixed column widths from separators
1625    let mut pos_byte = 1usize; // after leading pipe
1626    for (idx, cell_content) in cell_contents.iter().enumerate() {
1627        let part = if idx < columns.len() && pos_byte <= trimmed.len() {
1628            let start_byte = pos_byte;
1629            let end_byte = if idx + 1 == columns.len() && !trimmed.is_empty() {
1630                trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1631            } else {
1632                let (end, next_start) =
1633                    slice_cell_by_display_width(trimmed, pos_byte, columns[idx].width);
1634                pos_byte = next_start;
1635                end
1636            };
1637            let slice = &trimmed[start_byte..end_byte];
1638            if idx + 1 == columns.len() {
1639                pos_byte = trimmed.len();
1640            }
1641            slice
1642        } else {
1643            ""
1644        };
1645
1646        // Emit leading whitespace in cell
1647        let cell_trimmed = part.trim();
1648        let ws_start_len = part.len() - part.trim_start().len();
1649        if ws_start_len > 0 {
1650            builder.token(SyntaxKind::WHITESPACE.into(), &part[..ws_start_len]);
1651        }
1652
1653        // Emit TABLE_CELL with inline parsing
1654        emit_table_cell(builder, cell_content, config);
1655
1656        // Emit trailing whitespace in cell
1657        let ws_end_start = ws_start_len + cell_trimmed.len();
1658        if ws_end_start < part.len() {
1659            builder.token(SyntaxKind::WHITESPACE.into(), &part[ws_end_start..]);
1660        }
1661
1662        // Emit pipe separator (unless this is the last cell and line doesn't end with |)
1663        if idx < cell_contents.len() - 1 || trimmed.ends_with('|') {
1664            builder.token(SyntaxKind::TEXT.into(), "|");
1665        }
1666    }
1667
1668    // Emit trailing whitespace before newline
1669    let trailing_ws_start = leading_ws_len + trimmed.len();
1670    if trailing_ws_start < line_without_newline.len() {
1671        builder.token(
1672            SyntaxKind::WHITESPACE.into(),
1673            &line_without_newline[trailing_ws_start..],
1674        );
1675    }
1676
1677    // Emit newline
1678    if !newline_str.is_empty() {
1679        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1680    }
1681
1682    // Emit continuation lines as TEXT for losslessness
1683    for line in lines.iter().skip(1) {
1684        emit_line_tokens(builder, line);
1685    }
1686
1687    builder.finish_node();
1688}
1689
1690/// Try to parse a grid table starting at the given position.
1691/// Returns the number of lines consumed if successful.
1692pub(crate) fn try_parse_grid_table(
1693    lines: &[&str],
1694    start_pos: usize,
1695    builder: &mut GreenNodeBuilder<'static>,
1696    config: &ParserOptions,
1697) -> Option<usize> {
1698    if start_pos >= lines.len() {
1699        return None;
1700    }
1701
1702    // Check if this line is a caption followed by a table
1703    // If so, the actual table starts after the caption and blank line
1704    let (actual_start, has_caption_before) = if is_caption_followed_by_table(lines, start_pos) {
1705        // Skip caption line
1706        let mut pos = start_pos + 1;
1707        // Skip blank line if present
1708        while pos < lines.len() && lines[pos].trim().is_empty() {
1709            pos += 1;
1710        }
1711        (pos, true)
1712    } else {
1713        (start_pos, false)
1714    };
1715
1716    if actual_start >= lines.len() {
1717        return None;
1718    }
1719
1720    // First line must be a grid separator
1721    let first_line = lines[actual_start];
1722    let _columns = try_parse_grid_separator(first_line)?;
1723
1724    // Track table structure
1725    let mut end_pos = actual_start + 1;
1726    let mut found_header_sep = false;
1727    let mut in_footer = false;
1728
1729    // Scan table lines
1730    while end_pos < lines.len() {
1731        let line = lines[end_pos];
1732
1733        // Check for blank line (table ends)
1734        if line.trim().is_empty() {
1735            break;
1736        }
1737
1738        // Check for separator line
1739        if let Some(sep_cols) = try_parse_grid_separator(line) {
1740            // Check if this is a header separator (=)
1741            if sep_cols.iter().any(|c| c.is_header_separator) {
1742                if !found_header_sep {
1743                    found_header_sep = true;
1744                } else if !in_footer {
1745                    // Second = separator starts footer
1746                    in_footer = true;
1747                }
1748            }
1749            end_pos += 1;
1750            continue;
1751        }
1752
1753        // Check for content row
1754        if is_grid_content_row(line) {
1755            end_pos += 1;
1756            continue;
1757        }
1758
1759        // Not a valid grid table line - table ends
1760        break;
1761    }
1762
1763    // Must have consumed at least 3 lines (top separator, content, bottom separator)
1764    // Or just top + content rows that end with a separator
1765    if end_pos <= actual_start + 1 {
1766        return None;
1767    }
1768
1769    // Last consumed line should be a separator for a well-formed table
1770    // But we'll be lenient and accept tables ending with content rows
1771
1772    // Check for caption before table (only if we didn't already detected it)
1773    let caption_before = if has_caption_before {
1774        Some((start_pos, start_pos + 1)) // Single-line caption detected earlier
1775    } else {
1776        find_caption_before_table(lines, actual_start)
1777    };
1778
1779    // Check for caption after table
1780    let caption_after = find_caption_after_table(lines, end_pos);
1781
1782    // Build the grid table
1783    builder.start_node(SyntaxKind::GRID_TABLE.into());
1784
1785    // Emit caption before if present
1786    if let Some((cap_start, cap_end)) = caption_before {
1787        emit_table_caption(builder, lines, cap_start, cap_end, config);
1788        // Emit blank line between caption and table if present
1789        if cap_end < actual_start {
1790            for line in lines.iter().take(actual_start).skip(cap_end) {
1791                if line.trim().is_empty() {
1792                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1793                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1794                    builder.finish_node();
1795                }
1796            }
1797        }
1798    }
1799
1800    // Track whether we've passed the header separator
1801    let mut past_header_sep = false;
1802    let mut in_footer_section = false;
1803    let mut current_row_lines: Vec<&str> = Vec::new();
1804    let mut current_row_kind = SyntaxKind::TABLE_HEADER;
1805
1806    // Emit table rows - accumulate multi-line cells
1807    for line in lines.iter().take(end_pos).skip(actual_start) {
1808        if let Some(sep_cols) = try_parse_grid_separator(line) {
1809            // Separator line - emit any accumulated row first
1810            if !current_row_lines.is_empty() {
1811                emit_grid_table_row(
1812                    builder,
1813                    &current_row_lines,
1814                    &sep_cols,
1815                    current_row_kind,
1816                    config,
1817                );
1818                current_row_lines.clear();
1819            }
1820
1821            let is_header_sep = sep_cols.iter().any(|c| c.is_header_separator);
1822
1823            if is_header_sep {
1824                if !past_header_sep {
1825                    // This is the header/body separator
1826                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1827                    emit_line_tokens(builder, line);
1828                    builder.finish_node();
1829                    past_header_sep = true;
1830                } else {
1831                    // Footer separator
1832                    if !in_footer_section {
1833                        in_footer_section = true;
1834                    }
1835                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1836                    emit_line_tokens(builder, line);
1837                    builder.finish_node();
1838                }
1839            } else {
1840                // Regular separator (row boundary)
1841                builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1842                emit_line_tokens(builder, line);
1843                builder.finish_node();
1844            }
1845        } else if is_grid_content_row(line) {
1846            // Content row - accumulate for multi-line cells
1847            current_row_kind = if !past_header_sep && found_header_sep {
1848                SyntaxKind::TABLE_HEADER
1849            } else if in_footer_section {
1850                SyntaxKind::TABLE_FOOTER
1851            } else {
1852                SyntaxKind::TABLE_ROW
1853            };
1854
1855            current_row_lines.push(line);
1856        }
1857    }
1858
1859    // Emit any remaining accumulated row
1860    if !current_row_lines.is_empty() {
1861        // Use first separator's columns for cell boundaries
1862        if let Some(sep_cols) = try_parse_grid_separator(lines[actual_start]) {
1863            emit_grid_table_row(
1864                builder,
1865                &current_row_lines,
1866                &sep_cols,
1867                current_row_kind,
1868                config,
1869            );
1870        }
1871    }
1872
1873    // Emit caption after if present
1874    if let Some((cap_start, cap_end)) = caption_after {
1875        if cap_start > end_pos {
1876            builder.start_node(SyntaxKind::BLANK_LINE.into());
1877            builder.token(SyntaxKind::BLANK_LINE.into(), "\n");
1878            builder.finish_node();
1879        }
1880        emit_table_caption(builder, lines, cap_start, cap_end, config);
1881    }
1882
1883    builder.finish_node(); // GRID_TABLE
1884
1885    // Calculate lines consumed
1886    let table_start = caption_before
1887        .map(|(start, _)| start)
1888        .unwrap_or(actual_start);
1889    let table_end = if let Some((_, cap_end)) = caption_after {
1890        cap_end
1891    } else {
1892        end_pos
1893    };
1894
1895    Some(table_end - table_start)
1896}
1897
1898#[cfg(test)]
1899mod grid_table_tests {
1900    use super::*;
1901
1902    #[test]
1903    fn test_grid_separator_detection() {
1904        assert!(try_parse_grid_separator("+---+---+").is_some());
1905        assert!(try_parse_grid_separator("+===+===+").is_some());
1906        assert!(try_parse_grid_separator("+---------------+---------------+").is_some());
1907        assert!(try_parse_grid_separator("+:---:+").is_some()); // center aligned
1908        assert!(try_parse_grid_separator("not a separator").is_none());
1909        assert!(try_parse_grid_separator("|---|---|").is_none()); // pipe table sep
1910    }
1911
1912    #[test]
1913    fn test_grid_header_separator() {
1914        let cols = try_parse_grid_separator("+===+===+").unwrap();
1915        assert!(cols.iter().all(|c| c.is_header_separator));
1916
1917        let cols2 = try_parse_grid_separator("+---+---+").unwrap();
1918        assert!(cols2.iter().all(|c| !c.is_header_separator));
1919    }
1920
1921    #[test]
1922    fn test_grid_content_row_detection() {
1923        assert!(is_grid_content_row("| content | content |"));
1924        assert!(is_grid_content_row("|  |  |"));
1925        assert!(is_grid_content_row("| content +------+"));
1926        assert!(!is_grid_content_row("+---+---+")); // separator, not content
1927        assert!(!is_grid_content_row("no pipes here"));
1928    }
1929
1930    #[test]
1931    fn test_basic_grid_table() {
1932        let input = vec![
1933            "+-------+-------+",
1934            "| Col1  | Col2  |",
1935            "+=======+=======+",
1936            "| A     | B     |",
1937            "+-------+-------+",
1938            "",
1939        ];
1940
1941        let mut builder = GreenNodeBuilder::new();
1942        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
1943
1944        assert!(result.is_some());
1945        assert_eq!(result.unwrap(), 5);
1946    }
1947
1948    #[test]
1949    fn test_grid_table_multirow() {
1950        let input = vec![
1951            "+---------------+---------------+",
1952            "| Fruit         | Advantages    |",
1953            "+===============+===============+",
1954            "| Bananas       | - wrapper     |",
1955            "|               | - color       |",
1956            "+---------------+---------------+",
1957            "| Oranges       | - scurvy      |",
1958            "|               | - tasty       |",
1959            "+---------------+---------------+",
1960            "",
1961        ];
1962
1963        let mut builder = GreenNodeBuilder::new();
1964        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
1965
1966        assert!(result.is_some());
1967        assert_eq!(result.unwrap(), 9);
1968    }
1969
1970    #[test]
1971    fn test_grid_table_with_footer() {
1972        let input = vec![
1973            "+-------+-------+",
1974            "| Fruit | Price |",
1975            "+=======+=======+",
1976            "| Apple | $1.00 |",
1977            "+-------+-------+",
1978            "| Pear  | $1.50 |",
1979            "+=======+=======+",
1980            "| Total | $2.50 |",
1981            "+=======+=======+",
1982            "",
1983        ];
1984
1985        let mut builder = GreenNodeBuilder::new();
1986        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
1987
1988        assert!(result.is_some());
1989        assert_eq!(result.unwrap(), 9);
1990    }
1991
1992    #[test]
1993    fn test_grid_table_headerless() {
1994        let input = vec![
1995            "+-------+-------+",
1996            "| A     | B     |",
1997            "+-------+-------+",
1998            "| C     | D     |",
1999            "+-------+-------+",
2000            "",
2001        ];
2002
2003        let mut builder = GreenNodeBuilder::new();
2004        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2005
2006        assert!(result.is_some());
2007        assert_eq!(result.unwrap(), 5);
2008    }
2009
2010    #[test]
2011    fn test_grid_table_with_caption_before() {
2012        let input = vec![
2013            ": Sample table",
2014            "",
2015            "+-------+-------+",
2016            "| A     | B     |",
2017            "+=======+=======+",
2018            "| C     | D     |",
2019            "+-------+-------+",
2020            "",
2021        ];
2022
2023        let mut builder = GreenNodeBuilder::new();
2024        let result = try_parse_grid_table(&input, 2, &mut builder, &ParserOptions::default());
2025
2026        assert!(result.is_some());
2027        // Should include caption + blank + table
2028        assert_eq!(result.unwrap(), 7);
2029    }
2030
2031    #[test]
2032    fn test_grid_table_with_caption_after() {
2033        let input = vec![
2034            "+-------+-------+",
2035            "| A     | B     |",
2036            "+=======+=======+",
2037            "| C     | D     |",
2038            "+-------+-------+",
2039            "",
2040            "Table: My grid table",
2041            "",
2042        ];
2043
2044        let mut builder = GreenNodeBuilder::new();
2045        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2046
2047        assert!(result.is_some());
2048        // table + blank + caption
2049        assert_eq!(result.unwrap(), 7);
2050    }
2051}
2052
2053// ============================================================================
2054// Multiline Table Parsing
2055// ============================================================================
2056
2057/// Check if a line is a multiline table separator (continuous dashes).
2058/// Multiline table separators span the full width and are all dashes.
2059/// Returns Some(columns) if valid, None otherwise.
2060fn try_parse_multiline_separator(line: &str) -> Option<Vec<Column>> {
2061    let trimmed = line.trim_start();
2062    let leading_spaces = line.len() - trimmed.len();
2063
2064    // Must have leading spaces <= 3 to not be a code block
2065    if leading_spaces > 3 {
2066        return None;
2067    }
2068
2069    let trimmed = trimmed.trim_end();
2070
2071    // Must be all dashes (continuous line of dashes)
2072    if trimmed.is_empty() || !trimmed.chars().all(|c| c == '-') {
2073        return None;
2074    }
2075
2076    // Must have at least 3 dashes
2077    if trimmed.len() < 3 {
2078        return None;
2079    }
2080
2081    // This is a full-width separator - columns will be determined by column separator lines
2082    Some(vec![Column {
2083        start: leading_spaces,
2084        end: leading_spaces + trimmed.len(),
2085        alignment: Alignment::Default,
2086    }])
2087}
2088
2089/// Check if a line is a column separator line for multiline tables.
2090/// Column separators have dashes with spaces between them to define columns.
2091fn is_column_separator(line: &str) -> bool {
2092    try_parse_table_separator(line).is_some() && !line.contains('*') && !line.contains('_')
2093}
2094
2095fn is_headerless_single_row_without_blank(
2096    lines: &[&str],
2097    row_start: usize,
2098    row_end: usize,
2099    columns: &[Column],
2100) -> bool {
2101    if row_start >= row_end {
2102        return false;
2103    }
2104
2105    if row_end - row_start == 1 {
2106        return false;
2107    }
2108
2109    let Some(last_col) = columns.last() else {
2110        return false;
2111    };
2112
2113    for line in lines.iter().take(row_end).skip(row_start + 1) {
2114        let (content, _) = strip_newline(line);
2115        let prefix_end = last_col.start.min(content.len());
2116        if !content[..prefix_end].trim().is_empty() {
2117            return false;
2118        }
2119    }
2120
2121    true
2122}
2123
2124/// Try to parse a multiline table starting at the given position.
2125/// Returns the number of lines consumed if successful.
2126pub(crate) fn try_parse_multiline_table(
2127    lines: &[&str],
2128    start_pos: usize,
2129    builder: &mut GreenNodeBuilder<'static>,
2130    config: &ParserOptions,
2131) -> Option<usize> {
2132    if start_pos >= lines.len() {
2133        return None;
2134    }
2135
2136    let first_line = lines[start_pos];
2137
2138    // First line can be either:
2139    // 1. A full-width dash separator (for tables with headers)
2140    // 2. A column separator (for headerless tables)
2141    let is_full_width_start = try_parse_multiline_separator(first_line).is_some();
2142    let is_column_sep_start = !is_full_width_start && is_column_separator(first_line);
2143    let headerless_columns = if is_column_sep_start {
2144        try_parse_table_separator(first_line)
2145    } else {
2146        None
2147    };
2148
2149    if !is_full_width_start && !is_column_sep_start {
2150        return None;
2151    }
2152
2153    // Look ahead to find the structure
2154    let mut pos = start_pos + 1;
2155    let mut found_column_sep = is_column_sep_start; // Already found if headerless
2156    let mut column_sep_pos = if is_column_sep_start { start_pos } else { 0 };
2157    let mut has_header = false;
2158    let mut found_blank_line = false;
2159    let mut found_closing_sep = false;
2160    let mut content_line_count = 0usize;
2161
2162    // Scan for header section and column separator
2163    while pos < lines.len() {
2164        let line = lines[pos];
2165
2166        // Check for column separator (defines columns) - only if we started with full-width
2167        if is_full_width_start && is_column_separator(line) && !found_column_sep {
2168            found_column_sep = true;
2169            column_sep_pos = pos;
2170            has_header = pos > start_pos + 1; // Has header if there's content before column sep
2171            pos += 1;
2172            continue;
2173        }
2174
2175        // Check for blank line (row separator in body)
2176        if line.trim().is_empty() {
2177            found_blank_line = true;
2178            pos += 1;
2179            // Check if next line is a valid closing separator for this table shape.
2180            if pos < lines.len() {
2181                let next = lines[pos];
2182                let is_valid_closer = if is_full_width_start {
2183                    try_parse_multiline_separator(next).is_some()
2184                } else {
2185                    is_column_separator(next)
2186                };
2187                if is_valid_closer {
2188                    found_closing_sep = true;
2189                    pos += 1; // Include the closing separator
2190                    break;
2191                }
2192            }
2193            continue;
2194        }
2195
2196        // Check for closing full-width dashes (only for full-width-start tables).
2197        if is_full_width_start && try_parse_multiline_separator(line).is_some() {
2198            found_closing_sep = true;
2199            pos += 1;
2200            break;
2201        }
2202
2203        // Check for closing column separator (for headerless tables)
2204        if is_column_sep_start && is_column_separator(line) && content_line_count > 0 {
2205            found_closing_sep = true;
2206            pos += 1;
2207            break;
2208        }
2209
2210        // Content row
2211        content_line_count += 1;
2212        pos += 1;
2213    }
2214
2215    // Must have found a column separator to be a valid multiline table
2216    if !found_column_sep {
2217        return None;
2218    }
2219
2220    // Must have had at least one blank line between rows (distinguishes from simple tables)
2221    if !found_blank_line {
2222        if !is_column_sep_start {
2223            return None;
2224        }
2225        let columns = headerless_columns.as_deref()?;
2226        if !is_headerless_single_row_without_blank(lines, start_pos + 1, pos - 1, columns) {
2227            return None;
2228        }
2229    }
2230
2231    // Must have a closing separator
2232    if !found_closing_sep {
2233        return None;
2234    }
2235
2236    // Must have consumed more than just the opening separator
2237    if pos <= start_pos + 2 {
2238        return None;
2239    }
2240
2241    let end_pos = pos;
2242
2243    // Extract column boundaries from the separator line
2244    let columns =
2245        try_parse_table_separator(lines[column_sep_pos]).expect("Column separator must be valid");
2246
2247    // Check for caption before table
2248    let caption_before = find_caption_before_table(lines, start_pos);
2249
2250    // Check for caption after table
2251    let caption_after = find_caption_after_table(lines, end_pos);
2252
2253    // Build the multiline table
2254    builder.start_node(SyntaxKind::MULTILINE_TABLE.into());
2255
2256    // Emit caption before if present
2257    if let Some((cap_start, cap_end)) = caption_before {
2258        emit_table_caption(builder, lines, cap_start, cap_end, config);
2259
2260        // Emit blank line between caption and table if present
2261        if cap_end < start_pos {
2262            for line in lines.iter().take(start_pos).skip(cap_end) {
2263                if line.trim().is_empty() {
2264                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2265                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2266                    builder.finish_node();
2267                }
2268            }
2269        }
2270    }
2271
2272    // Emit opening separator
2273    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2274    emit_line_tokens(builder, lines[start_pos]);
2275    builder.finish_node();
2276
2277    // Track state for emitting
2278    let mut in_header = has_header;
2279    let mut current_row_lines: Vec<&str> = Vec::new();
2280
2281    for (i, line) in lines.iter().enumerate().take(end_pos).skip(start_pos + 1) {
2282        // Column separator (header/body divider)
2283        if i == column_sep_pos {
2284            // Emit any accumulated header lines
2285            if !current_row_lines.is_empty() {
2286                emit_multiline_table_row(
2287                    builder,
2288                    &current_row_lines,
2289                    &columns,
2290                    SyntaxKind::TABLE_HEADER,
2291                    config,
2292                );
2293                current_row_lines.clear();
2294            }
2295
2296            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2297            emit_line_tokens(builder, line);
2298            builder.finish_node();
2299            in_header = false;
2300            continue;
2301        }
2302
2303        // Closing separator (full-width or column separator at end)
2304        if try_parse_multiline_separator(line).is_some() || is_column_separator(line) {
2305            // Emit any accumulated row lines
2306            if !current_row_lines.is_empty() {
2307                let kind = if in_header {
2308                    SyntaxKind::TABLE_HEADER
2309                } else {
2310                    SyntaxKind::TABLE_ROW
2311                };
2312                emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2313                current_row_lines.clear();
2314            }
2315
2316            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2317            emit_line_tokens(builder, line);
2318            builder.finish_node();
2319            continue;
2320        }
2321
2322        // Blank line (row separator)
2323        if line.trim().is_empty() {
2324            // Emit accumulated row
2325            if !current_row_lines.is_empty() {
2326                let kind = if in_header {
2327                    SyntaxKind::TABLE_HEADER
2328                } else {
2329                    SyntaxKind::TABLE_ROW
2330                };
2331                emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2332                current_row_lines.clear();
2333            }
2334
2335            builder.start_node(SyntaxKind::BLANK_LINE.into());
2336            builder.token(SyntaxKind::BLANK_LINE.into(), "\n");
2337            builder.finish_node();
2338            continue;
2339        }
2340
2341        // Content line - accumulate for current row
2342        current_row_lines.push(line);
2343    }
2344
2345    // Emit any remaining accumulated lines
2346    if !current_row_lines.is_empty() {
2347        let kind = if in_header {
2348            SyntaxKind::TABLE_HEADER
2349        } else {
2350            SyntaxKind::TABLE_ROW
2351        };
2352        emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2353    }
2354
2355    // Emit caption after if present
2356    if let Some((cap_start, cap_end)) = caption_after {
2357        if cap_start > end_pos {
2358            builder.start_node(SyntaxKind::BLANK_LINE.into());
2359            builder.token(SyntaxKind::BLANK_LINE.into(), "\n");
2360            builder.finish_node();
2361        }
2362        emit_table_caption(builder, lines, cap_start, cap_end, config);
2363    }
2364
2365    builder.finish_node(); // MultilineTable
2366
2367    // Calculate lines consumed
2368    let table_start = caption_before.map(|(start, _)| start).unwrap_or(start_pos);
2369    let table_end = if let Some((_, cap_end)) = caption_after {
2370        cap_end
2371    } else {
2372        end_pos
2373    };
2374
2375    Some(table_end - table_start)
2376}
2377
2378/// Extract cell contents from first line only (for CST emission).
2379/// Multi-line content will be in continuation TEXT tokens.
2380fn extract_first_line_cell_contents(line: &str, columns: &[Column]) -> Vec<String> {
2381    let (line_content, _) = strip_newline(line);
2382    let mut cells = Vec::new();
2383
2384    for column in columns.iter() {
2385        // Extract FULL text for this column (including whitespace)
2386        let cell_text = if column.end <= line_content.len() {
2387            &line_content[column.start..column.end]
2388        } else if column.start < line_content.len() {
2389            &line_content[column.start..]
2390        } else {
2391            ""
2392        };
2393
2394        cells.push(cell_text.to_string());
2395    }
2396
2397    cells
2398}
2399
2400/// Emit a multiline table row with inline parsing (Phase 7.1).
2401fn emit_multiline_table_row(
2402    builder: &mut GreenNodeBuilder<'static>,
2403    lines: &[&str],
2404    columns: &[Column],
2405    kind: SyntaxKind,
2406    config: &ParserOptions,
2407) {
2408    if lines.is_empty() {
2409        return;
2410    }
2411
2412    // Extract cell contents from first line only (for CST losslessness)
2413    let first_line = lines[0];
2414    let cell_contents = extract_first_line_cell_contents(first_line, columns);
2415
2416    builder.start_node(kind.into());
2417
2418    // Emit first line with TABLE_CELL nodes
2419    let (trimmed, newline_str) = strip_newline(first_line);
2420    let mut current_pos = 0;
2421
2422    for (col_idx, column) in columns.iter().enumerate() {
2423        let cell_text = &cell_contents[col_idx];
2424        let cell_start = column.start.min(trimmed.len());
2425        let cell_end = column.end.min(trimmed.len());
2426
2427        // Emit whitespace before cell
2428        if current_pos < cell_start {
2429            builder.token(
2430                SyntaxKind::WHITESPACE.into(),
2431                &trimmed[current_pos..cell_start],
2432            );
2433        }
2434
2435        // Emit cell with inline parsing (first line content only)
2436        emit_table_cell(builder, cell_text, config);
2437
2438        current_pos = cell_end;
2439    }
2440
2441    // Emit trailing whitespace
2442    if current_pos < trimmed.len() {
2443        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
2444    }
2445
2446    // Emit newline
2447    if !newline_str.is_empty() {
2448        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2449    }
2450
2451    // Emit continuation lines as TEXT to preserve exact line structure
2452    for line in lines.iter().skip(1) {
2453        emit_line_tokens(builder, line);
2454    }
2455
2456    builder.finish_node();
2457}
2458
2459#[cfg(test)]
2460mod multiline_table_tests {
2461    use super::*;
2462    use crate::syntax::SyntaxNode;
2463
2464    #[test]
2465    fn test_multiline_separator_detection() {
2466        assert!(
2467            try_parse_multiline_separator(
2468                "-------------------------------------------------------------"
2469            )
2470            .is_some()
2471        );
2472        assert!(try_parse_multiline_separator("---").is_some());
2473        assert!(try_parse_multiline_separator("  -----").is_some()); // with leading spaces
2474        assert!(try_parse_multiline_separator("--").is_none()); // too short
2475        assert!(try_parse_multiline_separator("--- ---").is_none()); // has spaces
2476        assert!(try_parse_multiline_separator("+---+").is_none()); // grid separator
2477    }
2478
2479    #[test]
2480    fn test_basic_multiline_table() {
2481        let input = vec![
2482            "-------------------------------------------------------------",
2483            " Centered   Default           Right Left",
2484            "  Header    Aligned         Aligned Aligned",
2485            "----------- ------- --------------- -------------------------",
2486            "   First    row                12.0 Example of a row that",
2487            "                                    spans multiple lines.",
2488            "",
2489            "  Second    row                 5.0 Here's another one.",
2490            "-------------------------------------------------------------",
2491            "",
2492        ];
2493
2494        let mut builder = GreenNodeBuilder::new();
2495        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2496
2497        assert!(result.is_some());
2498        assert_eq!(result.unwrap(), 9);
2499    }
2500
2501    #[test]
2502    fn test_multiline_table_headerless() {
2503        let input = vec![
2504            "----------- ------- --------------- -------------------------",
2505            "   First    row                12.0 Example of a row that",
2506            "                                    spans multiple lines.",
2507            "",
2508            "  Second    row                 5.0 Here's another one.",
2509            "----------- ------- --------------- -------------------------",
2510            "",
2511        ];
2512
2513        let mut builder = GreenNodeBuilder::new();
2514        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2515
2516        assert!(result.is_some());
2517        assert_eq!(result.unwrap(), 6);
2518    }
2519
2520    #[test]
2521    fn test_multiline_table_headerless_single_line_is_not_multiline() {
2522        let input = vec![
2523            "-------     ------ ----------   -------",
2524            "     12     12        12             12",
2525            "-------     ------ ----------   -------",
2526            "",
2527            "Not part of table.",
2528            "",
2529        ];
2530
2531        let mut builder = GreenNodeBuilder::new();
2532        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2533
2534        assert!(result.is_none());
2535    }
2536
2537    #[test]
2538    fn test_multiline_table_headerless_single_row_continuation_without_blank_line() {
2539        let input = vec![
2540            "----------  ---------  -----------  ---------------------------",
2541            "   First    row               12.0  Example of a row that spans",
2542            "                                    multiple lines.",
2543            "----------  ---------  -----------  ---------------------------",
2544            "",
2545        ];
2546
2547        let mut builder = GreenNodeBuilder::new();
2548        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2549
2550        assert!(result.is_some());
2551        assert_eq!(result.unwrap(), 4);
2552    }
2553
2554    #[test]
2555    fn test_multiline_table_with_caption() {
2556        let input = vec![
2557            "-------------------------------------------------------------",
2558            " Col1       Col2",
2559            "----------- -------",
2560            "   A        B",
2561            "",
2562            "-------------------------------------------------------------",
2563            "",
2564            "Table: Here's the caption.",
2565            "",
2566        ];
2567
2568        let mut builder = GreenNodeBuilder::new();
2569        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2570
2571        assert!(result.is_some());
2572        // table (6 lines) + blank + caption
2573        assert_eq!(result.unwrap(), 8);
2574    }
2575
2576    #[test]
2577    fn test_multiline_table_single_row() {
2578        let input = vec![
2579            "---------------------------------------------",
2580            " Header1    Header2",
2581            "----------- -----------",
2582            "   Data     More data",
2583            "",
2584            "---------------------------------------------",
2585            "",
2586        ];
2587
2588        let mut builder = GreenNodeBuilder::new();
2589        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2590
2591        assert!(result.is_some());
2592        assert_eq!(result.unwrap(), 6);
2593    }
2594
2595    #[test]
2596    fn test_headerless_multiline_table_does_not_close_on_full_width_rule() {
2597        let input = vec![
2598            "- - - - -",
2599            "Third section with underscores.",
2600            "",
2601            "_____",
2602            "",
2603            "> Quote before rule",
2604            ">",
2605            "> ***",
2606            ">",
2607            "> Quote after rule",
2608            "",
2609            "Final paragraph.",
2610            "",
2611            "Here's a horizontal rule:",
2612            "",
2613            "---",
2614            "Text directly after the horizontal rule.",
2615            "",
2616        ];
2617
2618        let mut builder = GreenNodeBuilder::new();
2619        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2620
2621        assert!(result.is_none());
2622    }
2623
2624    #[test]
2625    fn test_not_multiline_table() {
2626        // Simple table should not be parsed as multiline
2627        let input = vec![
2628            "  Right     Left     Center     Default",
2629            "-------     ------ ----------   -------",
2630            "     12     12        12            12",
2631            "",
2632        ];
2633
2634        let mut builder = GreenNodeBuilder::new();
2635        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2636
2637        // Should not parse because first line isn't a full-width separator
2638        assert!(result.is_none());
2639    }
2640
2641    // Phase 7.1: Unit tests for emit_table_cell() helper
2642    #[test]
2643    fn test_emit_table_cell_plain_text() {
2644        let mut builder = GreenNodeBuilder::new();
2645        emit_table_cell(&mut builder, "Cell", &ParserOptions::default());
2646        let green = builder.finish();
2647        let node = SyntaxNode::new_root(green);
2648
2649        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2650        assert_eq!(node.text(), "Cell");
2651
2652        // Should have TEXT child
2653        let children: Vec<_> = node.children_with_tokens().collect();
2654        assert_eq!(children.len(), 1);
2655        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
2656    }
2657
2658    #[test]
2659    fn test_emit_table_cell_with_emphasis() {
2660        let mut builder = GreenNodeBuilder::new();
2661        emit_table_cell(&mut builder, "*italic*", &ParserOptions::default());
2662        let green = builder.finish();
2663        let node = SyntaxNode::new_root(green);
2664
2665        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2666        assert_eq!(node.text(), "*italic*");
2667
2668        // Should have EMPHASIS child
2669        let children: Vec<_> = node.children().collect();
2670        assert_eq!(children.len(), 1);
2671        assert_eq!(children[0].kind(), SyntaxKind::EMPHASIS);
2672    }
2673
2674    #[test]
2675    fn test_emit_table_cell_with_code() {
2676        let mut builder = GreenNodeBuilder::new();
2677        emit_table_cell(&mut builder, "`code`", &ParserOptions::default());
2678        let green = builder.finish();
2679        let node = SyntaxNode::new_root(green);
2680
2681        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2682        assert_eq!(node.text(), "`code`");
2683
2684        // Should have CODE_SPAN child
2685        let children: Vec<_> = node.children().collect();
2686        assert_eq!(children.len(), 1);
2687        assert_eq!(children[0].kind(), SyntaxKind::INLINE_CODE);
2688    }
2689
2690    #[test]
2691    fn test_emit_table_cell_with_link() {
2692        let mut builder = GreenNodeBuilder::new();
2693        emit_table_cell(&mut builder, "[text](url)", &ParserOptions::default());
2694        let green = builder.finish();
2695        let node = SyntaxNode::new_root(green);
2696
2697        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2698        assert_eq!(node.text(), "[text](url)");
2699
2700        // Should have LINK child
2701        let children: Vec<_> = node.children().collect();
2702        assert_eq!(children.len(), 1);
2703        assert_eq!(children[0].kind(), SyntaxKind::LINK);
2704    }
2705
2706    #[test]
2707    fn test_emit_table_cell_with_strong() {
2708        let mut builder = GreenNodeBuilder::new();
2709        emit_table_cell(&mut builder, "**bold**", &ParserOptions::default());
2710        let green = builder.finish();
2711        let node = SyntaxNode::new_root(green);
2712
2713        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2714        assert_eq!(node.text(), "**bold**");
2715
2716        // Should have STRONG child
2717        let children: Vec<_> = node.children().collect();
2718        assert_eq!(children.len(), 1);
2719        assert_eq!(children[0].kind(), SyntaxKind::STRONG);
2720    }
2721
2722    #[test]
2723    fn test_emit_table_cell_mixed_inline() {
2724        let mut builder = GreenNodeBuilder::new();
2725        emit_table_cell(
2726            &mut builder,
2727            "Text **bold** and `code`",
2728            &ParserOptions::default(),
2729        );
2730        let green = builder.finish();
2731        let node = SyntaxNode::new_root(green);
2732
2733        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2734        assert_eq!(node.text(), "Text **bold** and `code`");
2735
2736        // Should have multiple children: TEXT, STRONG, TEXT, CODE_SPAN
2737        let children: Vec<_> = node.children_with_tokens().collect();
2738        assert!(children.len() >= 4);
2739
2740        // Check some expected types
2741        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
2742        assert_eq!(children[1].kind(), SyntaxKind::STRONG);
2743    }
2744
2745    #[test]
2746    fn test_emit_table_cell_empty() {
2747        let mut builder = GreenNodeBuilder::new();
2748        emit_table_cell(&mut builder, "", &ParserOptions::default());
2749        let green = builder.finish();
2750        let node = SyntaxNode::new_root(green);
2751
2752        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2753        assert_eq!(node.text(), "");
2754
2755        // Empty cell should have no children
2756        let children: Vec<_> = node.children_with_tokens().collect();
2757        assert_eq!(children.len(), 0);
2758    }
2759
2760    #[test]
2761    fn test_emit_table_cell_escaped_pipe() {
2762        let mut builder = GreenNodeBuilder::new();
2763        emit_table_cell(&mut builder, r"A \| B", &ParserOptions::default());
2764        let green = builder.finish();
2765        let node = SyntaxNode::new_root(green);
2766
2767        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2768        // The escaped pipe should be preserved
2769        assert_eq!(node.text(), r"A \| B");
2770    }
2771}