Skip to main content

panache_parser/parser/blocks/
tables.rs

1//! Simple table parsing for Pandoc's simple_tables extension.
2
3use crate::config::Config;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6use unicode_width::UnicodeWidthChar;
7
8use crate::parser::utils::helpers::{emit_line_tokens, strip_newline};
9use crate::parser::utils::inline_emission;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum Alignment {
13    Left,
14    Right,
15    Center,
16    Default,
17}
18
19/// Column information extracted from the separator line.
20#[derive(Debug, Clone)]
21pub(crate) struct Column {
22    /// Start position (byte index) in the line
23    start: usize,
24    /// End position (byte index) in the line
25    end: usize,
26    /// Column alignment
27    alignment: Alignment,
28}
29
30/// Try to detect if a line is a table separator line.
31/// Returns Some(column positions) if it's a valid separator.
32pub(crate) fn try_parse_table_separator(line: &str) -> Option<Vec<Column>> {
33    let trimmed = line.trim_start();
34    // Strip trailing newline if present (CRLF or LF)
35    let (trimmed, newline_str) = strip_newline(trimmed);
36    let leading_spaces = line.len() - trimmed.len() - newline_str.len();
37
38    // Must have leading spaces <= 3 to not be a code block
39    if leading_spaces > 3 {
40        return None;
41    }
42
43    // Simple tables only use dashed separators.
44    if trimmed.contains('*') || trimmed.contains('_') {
45        return None;
46    }
47
48    // Must contain at least one dash
49    if !trimmed.contains('-') {
50        return None;
51    }
52
53    // A separator line consists of dashes and spaces
54    if !trimmed.chars().all(|c| c == '-' || c == ' ') {
55        return None;
56    }
57
58    // Must not be a horizontal rule.
59    let dash_groups: Vec<_> = trimmed.split(' ').filter(|s| !s.is_empty()).collect();
60    if dash_groups.len() <= 1 {
61        return None;
62    }
63
64    // Extract column positions from dash groups
65    let columns = extract_columns(trimmed, leading_spaces);
66
67    if columns.is_empty() {
68        return None;
69    }
70
71    Some(columns)
72}
73
74/// Extract column positions from a separator line.
75fn extract_columns(separator: &str, offset: usize) -> Vec<Column> {
76    let mut columns = Vec::new();
77    let mut in_dashes = false;
78    let mut col_start = 0;
79
80    for (i, ch) in separator.char_indices() {
81        match ch {
82            '-' => {
83                if !in_dashes {
84                    col_start = i + offset;
85                    in_dashes = true;
86                }
87            }
88            ' ' => {
89                if in_dashes {
90                    columns.push(Column {
91                        start: col_start,
92                        end: i + offset,
93                        alignment: Alignment::Default, // Will be determined later
94                    });
95                    in_dashes = false;
96                }
97            }
98            _ => {}
99        }
100    }
101
102    // Handle last column
103    if in_dashes {
104        columns.push(Column {
105            start: col_start,
106            end: separator.len() + offset,
107            alignment: Alignment::Default,
108        });
109    }
110
111    columns
112}
113
114/// Try to parse a table caption from a line.
115/// Returns Some((prefix_len, caption_text)) if it's a caption.
116fn try_parse_caption_prefix(line: &str) -> Option<(usize, &str)> {
117    let trimmed = line.trim_start();
118    let leading_spaces = line.len() - trimmed.len();
119
120    // Must have leading spaces <= 3 to not be a code block
121    if leading_spaces > 3 {
122        return None;
123    }
124
125    // Check for "Table:" or "table:" or just ":".
126    if let Some(rest) = trimmed.strip_prefix("Table:") {
127        Some((leading_spaces + 6, rest))
128    } else if let Some(rest) = trimmed.strip_prefix("table:") {
129        Some((leading_spaces + 6, rest))
130    } else if let Some(rest) = trimmed.strip_prefix(':') {
131        // Just ":" caption markers must be followed by whitespace (Pandoc-style).
132        // This avoids accidentally treating constructs like fenced div fences ":::" as captions.
133        if rest.starts_with(|c: char| c.is_whitespace()) {
134            Some((leading_spaces + 1, rest))
135        } else {
136            None
137        }
138    } else {
139        None
140    }
141}
142
143/// Check if a line could be the start of a table caption.
144fn is_table_caption_start(line: &str) -> bool {
145    try_parse_caption_prefix(line).is_some()
146}
147
148fn is_bare_colon_caption_start(line: &str) -> bool {
149    let trimmed = line.trim_start();
150    trimmed.starts_with(':') && !trimmed.starts_with("::") && !trimmed.starts_with(":::")
151}
152
153fn bare_colon_caption_looks_like_definition_code_block(line: &str) -> bool {
154    let Some((_, rest)) = try_parse_caption_prefix(line) else {
155        return false;
156    };
157    let trimmed = rest.trim_start();
158    trimmed.starts_with("```") || trimmed.starts_with("~~~")
159}
160
161fn is_valid_caption_start_before_table(lines: &[&str], pos: usize) -> bool {
162    if !is_table_caption_start(lines[pos]) {
163        return false;
164    }
165
166    if is_bare_colon_caption_start(lines[pos])
167        && bare_colon_caption_looks_like_definition_code_block(lines[pos])
168    {
169        return false;
170    }
171
172    // Avoid stealing definition-list definitions (":   ...") as table captions.
173    if is_bare_colon_caption_start(lines[pos]) && pos > 0 && !lines[pos - 1].trim().is_empty() {
174        return false;
175    }
176    true
177}
178
179/// Check if a line could be the start of a grid table.
180/// Grid tables start with a separator line like +---+---+ or +===+===+
181fn is_grid_table_start(line: &str) -> bool {
182    try_parse_grid_separator(line).is_some()
183}
184
185/// Check if a line could be the start of a multiline table.
186/// Multiline tables start with either:
187/// - A full-width dash separator (----)
188/// - A column separator with dashes and spaces (---- ---- ----)
189fn is_multiline_table_start(line: &str) -> bool {
190    try_parse_multiline_separator(line).is_some() || is_column_separator(line)
191}
192
193/// Check if there's a table following a potential caption at this position.
194/// This is used to avoid parsing a caption as a paragraph when it belongs to a table.
195pub(crate) fn is_caption_followed_by_table(lines: &[&str], caption_pos: usize) -> bool {
196    if caption_pos >= lines.len() {
197        return false;
198    }
199
200    // Caption must start with a caption prefix
201    if !is_valid_caption_start_before_table(lines, caption_pos) {
202        return false;
203    }
204
205    let mut pos = caption_pos + 1;
206
207    // Skip continuation lines of caption (non-blank lines)
208    while pos < lines.len() && !lines[pos].trim().is_empty() {
209        // If we hit a table separator, we found a table
210        if try_parse_table_separator(lines[pos]).is_some() {
211            return true;
212        }
213        pos += 1;
214    }
215
216    // Skip one blank line
217    if pos < lines.len() && lines[pos].trim().is_empty() {
218        pos += 1;
219    }
220
221    // Check for table at next position
222    if pos < lines.len() {
223        let line = lines[pos];
224
225        // Check for grid table start (+---+---+ or +===+===+)
226        if is_grid_table_start(line) {
227            return true;
228        }
229
230        // Check for multiline table start (---- or ---- ---- ----)
231        if is_multiline_table_start(line) {
232            return true;
233        }
234
235        // Could be a separator line (simple/pipe table, headerless)
236        if try_parse_table_separator(line).is_some() {
237            return true;
238        }
239
240        // Or could be a header line followed by separator (simple/pipe table with header)
241        if pos + 1 < lines.len() && !line.trim().is_empty() {
242            let next_line = lines[pos + 1];
243            if try_parse_table_separator(next_line).is_some()
244                || try_parse_pipe_separator(next_line).is_some()
245            {
246                return true;
247            }
248        }
249    }
250
251    false
252}
253
254/// Find caption before table (if any).
255/// Returns (caption_start, caption_end) positions, or None.
256fn find_caption_before_table(lines: &[&str], table_start: usize) -> Option<(usize, usize)> {
257    if table_start == 0 {
258        return None;
259    }
260
261    // Look backward for a caption
262    // Caption must be immediately before table (with possible blank line between)
263    let mut pos = table_start - 1;
264
265    // Skip one blank line if present
266    if lines[pos].trim().is_empty() {
267        if pos == 0 {
268            return None;
269        }
270        pos -= 1;
271    }
272
273    // Now pos points to the last non-blank line before the table
274    // This could be the last line of a multiline caption, or a single-line caption
275    let caption_end = pos + 1; // End is exclusive
276
277    // If this line is NOT a caption start, it might be a continuation line
278    // Scan backward through non-blank lines to find the caption start
279    if !is_valid_caption_start_before_table(lines, pos) {
280        // Not a caption start - check if there's a caption start above
281        let mut scan_pos = pos;
282        while scan_pos > 0 {
283            scan_pos -= 1;
284            let line = lines[scan_pos];
285
286            // If we hit a blank line, we've gone too far
287            if line.trim().is_empty() {
288                return None;
289            }
290
291            // If we find a caption start, this is the beginning of the multiline caption
292            if is_valid_caption_start_before_table(lines, scan_pos) {
293                if scan_pos > 0 && !lines[scan_pos - 1].trim().is_empty() {
294                    return None;
295                }
296                if previous_nonblank_looks_like_table(lines, scan_pos) {
297                    return None;
298                }
299                return Some((scan_pos, caption_end));
300            }
301        }
302        // Scanned to beginning without finding caption start
303        None
304    } else {
305        if pos > 0 && !lines[pos - 1].trim().is_empty() {
306            return None;
307        }
308        if previous_nonblank_looks_like_table(lines, pos) {
309            return None;
310        }
311        // This line is a caption start - return the range
312        Some((pos, caption_end))
313    }
314}
315
316fn previous_nonblank_looks_like_table(lines: &[&str], pos: usize) -> bool {
317    if pos == 0 {
318        return false;
319    }
320    let mut i = pos;
321    while i > 0 {
322        i -= 1;
323        let line = lines[i].trim();
324        if line.is_empty() {
325            continue;
326        }
327        return line_looks_like_table_syntax(line);
328    }
329    false
330}
331
332fn line_looks_like_table_syntax(line: &str) -> bool {
333    if line.starts_with('|') && line.matches('|').count() >= 2 {
334        return true;
335    }
336    if line.starts_with('+') && line.ends_with('+') && (line.contains('-') || line.contains('=')) {
337        return true;
338    }
339    try_parse_table_separator(line).is_some()
340        || try_parse_pipe_separator(line).is_some()
341        || try_parse_grid_separator(line).is_some()
342}
343
344/// Find caption after table (if any).
345/// Returns (caption_start, caption_end) positions, or None.
346fn find_caption_after_table(lines: &[&str], table_end: usize) -> Option<(usize, usize)> {
347    if table_end >= lines.len() {
348        return None;
349    }
350
351    let mut pos = table_end;
352
353    // Skip one blank line if present
354    if pos < lines.len() && lines[pos].trim().is_empty() {
355        pos += 1;
356    }
357
358    if pos >= lines.len() {
359        return None;
360    }
361
362    // Check if this line is a caption
363    if is_table_caption_start(lines[pos]) {
364        let caption_start = pos;
365        // Find end of caption (continues until blank line)
366        let mut caption_end = caption_start + 1;
367        while caption_end < lines.len() && !lines[caption_end].trim().is_empty() {
368            caption_end += 1;
369        }
370        Some((caption_start, caption_end))
371    } else {
372        None
373    }
374}
375
376/// Emit a table caption node.
377fn emit_table_caption(
378    builder: &mut GreenNodeBuilder<'static>,
379    lines: &[&str],
380    start: usize,
381    end: usize,
382    config: &Config,
383) {
384    builder.start_node(SyntaxKind::TABLE_CAPTION.into());
385
386    for (i, line) in lines[start..end].iter().enumerate() {
387        if i == 0 {
388            // First line - parse and emit prefix separately
389            let trimmed = line.trim_start();
390            let leading_ws_len = line.len() - trimmed.len();
391
392            // Emit leading whitespace if present
393            if leading_ws_len > 0 {
394                builder.token(SyntaxKind::WHITESPACE.into(), &line[..leading_ws_len]);
395            }
396
397            // Check for caption prefix and emit separately
398            // Calculate where the prefix ends (after trimmed content)
399            let prefix_and_rest = if line.ends_with('\n') {
400                &line[leading_ws_len..line.len() - 1] // Exclude newline
401            } else {
402                &line[leading_ws_len..]
403            };
404
405            let (prefix_len, prefix_text) = if prefix_and_rest.starts_with("Table: ") {
406                (7, "Table: ")
407            } else if prefix_and_rest.starts_with("table: ") {
408                (7, "table: ")
409            } else if prefix_and_rest.starts_with(": ") {
410                (2, ": ")
411            } else if prefix_and_rest.starts_with(':') {
412                (1, ":")
413            } else {
414                (0, "")
415            };
416
417            if prefix_len > 0 {
418                builder.token(SyntaxKind::TABLE_CAPTION_PREFIX.into(), prefix_text);
419
420                // Emit rest of line after prefix
421                let rest_start = leading_ws_len + prefix_len;
422                if rest_start < line.len() {
423                    // Get the caption text (excluding newline)
424                    let (caption_text, newline_str) = strip_newline(&line[rest_start..]);
425
426                    if !caption_text.is_empty() {
427                        inline_emission::emit_inlines(builder, caption_text, config);
428                    }
429
430                    if !newline_str.is_empty() {
431                        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
432                    }
433                }
434            } else {
435                // No recognized prefix, emit whole trimmed line
436                let (text, newline_str) = strip_newline(&line[leading_ws_len..]);
437
438                if !text.is_empty() {
439                    inline_emission::emit_inlines(builder, text, config);
440                }
441
442                if !newline_str.is_empty() {
443                    builder.token(SyntaxKind::NEWLINE.into(), newline_str);
444                }
445            }
446        } else {
447            // Continuation lines - emit with inline parsing
448            let (text, newline_str) = strip_newline(line);
449
450            if !text.is_empty() {
451                inline_emission::emit_inlines(builder, text, config);
452            }
453
454            if !newline_str.is_empty() {
455                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
456            }
457        }
458    }
459
460    builder.finish_node(); // TABLE_CAPTION
461}
462
463/// Emit a table cell with inline content parsing.
464/// This is the core helper for Phase 7.1 table inline parsing migration.
465fn emit_table_cell(builder: &mut GreenNodeBuilder<'static>, cell_text: &str, config: &Config) {
466    builder.start_node(SyntaxKind::TABLE_CELL.into());
467
468    // Parse inline content within the cell
469    if !cell_text.is_empty() {
470        inline_emission::emit_inlines(builder, cell_text, config);
471    }
472
473    builder.finish_node(); // TABLE_CELL
474}
475
476/// Determine column alignments based on separator and optional header.
477fn determine_alignments(columns: &mut [Column], separator_line: &str, header_line: Option<&str>) {
478    for col in columns.iter_mut() {
479        let sep_slice = &separator_line[col.start..col.end];
480
481        if let Some(header) = header_line {
482            // Extract header text for this column
483            let header_text = if col.end <= header.len() {
484                header[col.start..col.end].trim()
485            } else if col.start < header.len() {
486                header[col.start..].trim()
487            } else {
488                ""
489            };
490
491            if header_text.is_empty() {
492                col.alignment = Alignment::Default;
493                continue;
494            }
495
496            // Find where the header text starts and ends within the column
497            let header_in_col = &header[col.start..col.end.min(header.len())];
498            let text_start = header_in_col.len() - header_in_col.trim_start().len();
499            let text_end = header_in_col.trim_end().len() + text_start;
500
501            // Check dash alignment relative to text
502            let dashes_start = 0; // Dashes start at beginning of sep_slice
503            let dashes_end = sep_slice.len();
504
505            let flush_left = dashes_start == text_start;
506            let flush_right = dashes_end == text_end;
507
508            col.alignment = match (flush_left, flush_right) {
509                (true, true) => Alignment::Default,
510                (true, false) => Alignment::Left,
511                (false, true) => Alignment::Right,
512                (false, false) => Alignment::Center,
513            };
514        } else {
515            // Without header, alignment based on first row (we'll handle this later)
516            col.alignment = Alignment::Default;
517        }
518    }
519}
520
521/// Try to parse a simple table starting at the given position.
522/// Returns the number of lines consumed if successful.
523pub(crate) fn try_parse_simple_table(
524    lines: &[&str],
525    start_pos: usize,
526    builder: &mut GreenNodeBuilder<'static>,
527    config: &Config,
528) -> Option<usize> {
529    log::debug!("try_parse_simple_table at line {}", start_pos + 1);
530
531    if start_pos >= lines.len() {
532        return None;
533    }
534
535    // Look for a separator line
536    let separator_pos = find_separator_line(lines, start_pos)?;
537    log::debug!("  found separator at line {}", separator_pos + 1);
538
539    let separator_line = lines[separator_pos];
540    let mut columns = try_parse_table_separator(separator_line)?;
541
542    // Determine if there's a header (separator not at start)
543    let has_header = separator_pos > start_pos;
544    let header_line = if has_header {
545        Some(lines[separator_pos - 1])
546    } else {
547        None
548    };
549
550    // Determine alignments
551    determine_alignments(&mut columns, separator_line, header_line);
552
553    // Find table end (blank line or end of input)
554    let end_pos = find_table_end(lines, separator_pos + 1);
555
556    // Must have at least one data row (or it's just a separator)
557    let data_rows = end_pos - separator_pos - 1;
558
559    if data_rows == 0 {
560        return None;
561    }
562
563    // Check for caption before table
564    let caption_before = find_caption_before_table(lines, start_pos);
565
566    // Check for caption after table
567    let caption_after = find_caption_after_table(lines, end_pos);
568
569    // Build the table
570    builder.start_node(SyntaxKind::SIMPLE_TABLE.into());
571
572    // Emit caption before if present
573    if let Some((cap_start, cap_end)) = caption_before {
574        emit_table_caption(builder, lines, cap_start, cap_end, config);
575    }
576
577    // Emit header if present
578    if has_header {
579        emit_table_row(
580            builder,
581            lines[separator_pos - 1],
582            &columns,
583            SyntaxKind::TABLE_HEADER,
584            config,
585        );
586    }
587
588    // Emit separator
589    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
590    emit_line_tokens(builder, separator_line);
591    builder.finish_node();
592
593    // Emit data rows
594    for line in lines.iter().take(end_pos).skip(separator_pos + 1) {
595        emit_table_row(builder, line, &columns, SyntaxKind::TABLE_ROW, config);
596    }
597
598    // Emit caption after if present
599    if let Some((cap_start, cap_end)) = caption_after {
600        // Emit blank line before caption if needed
601        if cap_start > end_pos {
602            builder.start_node(SyntaxKind::BLANK_LINE.into());
603            builder.token(SyntaxKind::BLANK_LINE.into(), "\n");
604            builder.finish_node();
605        }
606        emit_table_caption(builder, lines, cap_start, cap_end, config);
607    }
608
609    builder.finish_node(); // SimpleTable
610
611    // Calculate lines consumed (including captions)
612    let table_start = if let Some((cap_start, _)) = caption_before {
613        cap_start
614    } else if has_header {
615        separator_pos - 1
616    } else {
617        separator_pos
618    };
619
620    let table_end = if let Some((_, cap_end)) = caption_after {
621        cap_end
622    } else {
623        end_pos
624    };
625
626    let lines_consumed = table_end - table_start;
627
628    Some(lines_consumed)
629}
630
631/// Find the position of a separator line starting from pos.
632fn find_separator_line(lines: &[&str], start_pos: usize) -> Option<usize> {
633    log::debug!("  find_separator_line from line {}", start_pos + 1);
634
635    // Check first line
636    log::debug!("    checking first line: {:?}", lines[start_pos]);
637    if try_parse_table_separator(lines[start_pos]).is_some() {
638        log::debug!("    separator found at first line");
639        return Some(start_pos);
640    }
641
642    // Check second line (for table with header)
643    if start_pos + 1 < lines.len()
644        && !lines[start_pos].trim().is_empty()
645        && try_parse_table_separator(lines[start_pos + 1]).is_some()
646    {
647        return Some(start_pos + 1);
648    }
649
650    None
651}
652
653/// Find where the table ends (first blank line or end of input).
654fn find_table_end(lines: &[&str], start_pos: usize) -> usize {
655    for i in start_pos..lines.len() {
656        if lines[i].trim().is_empty() {
657            return i;
658        }
659        // Check if this could be a closing separator
660        if try_parse_table_separator(lines[i]).is_some() {
661            // Check if next line is blank or end
662            if i + 1 >= lines.len() || lines[i + 1].trim().is_empty() {
663                return i + 1;
664            }
665        }
666    }
667    lines.len()
668}
669
670/// Emit a table row (header or data row) with inline-parsed cells for simple tables.
671/// Uses column boundaries from the separator line to extract cells.
672fn emit_table_row(
673    builder: &mut GreenNodeBuilder<'static>,
674    line: &str,
675    columns: &[Column],
676    row_kind: SyntaxKind,
677    config: &Config,
678) {
679    builder.start_node(row_kind.into());
680
681    let (line_without_newline, newline_str) = strip_newline(line);
682
683    // Emit leading whitespace if present
684    let trimmed = line_without_newline.trim_start();
685    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
686    if leading_ws_len > 0 {
687        builder.token(
688            SyntaxKind::WHITESPACE.into(),
689            &line_without_newline[..leading_ws_len],
690        );
691    }
692
693    // Track where we are in the line (for losslessness)
694    let mut current_pos = 0;
695
696    // Extract and emit cells based on column boundaries
697    for col in columns.iter() {
698        // Calculate actual positions in the trimmed line (accounting for leading whitespace)
699        let cell_start = if col.start >= leading_ws_len {
700            (col.start - leading_ws_len).min(trimmed.len())
701        } else {
702            0
703        };
704
705        let cell_end = if col.end >= leading_ws_len {
706            (col.end - leading_ws_len).min(trimmed.len())
707        } else {
708            0
709        };
710
711        // Extract cell text from column bounds
712        let cell_text = if cell_start < cell_end && cell_start < trimmed.len() {
713            &trimmed[cell_start..cell_end]
714        } else if cell_start < trimmed.len() {
715            &trimmed[cell_start..]
716        } else {
717            ""
718        };
719
720        let cell_content = cell_text.trim();
721        let cell_content_start = cell_text.len() - cell_text.trim_start().len();
722
723        // Emit any whitespace from current position to start of cell content
724        let content_abs_pos = (cell_start + cell_content_start).min(trimmed.len());
725        if current_pos < content_abs_pos {
726            builder.token(
727                SyntaxKind::WHITESPACE.into(),
728                &trimmed[current_pos..content_abs_pos],
729            );
730        }
731
732        // Emit cell with inline parsing
733        emit_table_cell(builder, cell_content, config);
734
735        // Update current position to end of cell content
736        current_pos = content_abs_pos + cell_content.len();
737    }
738
739    // Emit any remaining whitespace after last cell
740    if current_pos < trimmed.len() {
741        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
742    }
743
744    // Emit newline if present
745    if !newline_str.is_empty() {
746        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
747    }
748
749    builder.finish_node();
750}
751
752// ============================================================================
753// Pipe Table Parsing
754// ============================================================================
755
756/// Check if a line is a pipe table separator line.
757/// Returns the column alignments if it's a valid separator.
758fn try_parse_pipe_separator(line: &str) -> Option<Vec<Alignment>> {
759    let trimmed = line.trim();
760
761    // Must contain at least one pipe
762    if !trimmed.contains('|') && !trimmed.contains('+') {
763        return None;
764    }
765
766    // Split by pipes (or + for orgtbl variant)
767    let cells: Vec<&str> = if trimmed.contains('+') {
768        // Orgtbl variant: use + as separator in separator line
769        trimmed.split(['|', '+']).collect()
770    } else {
771        trimmed.split('|').collect()
772    };
773
774    let mut alignments = Vec::new();
775
776    for cell in cells {
777        let cell = cell.trim();
778
779        // Skip empty cells (from leading/trailing pipes)
780        if cell.is_empty() {
781            continue;
782        }
783
784        // Must be dashes with optional colons
785        let starts_colon = cell.starts_with(':');
786        let ends_colon = cell.ends_with(':');
787
788        // Remove colons to check if rest is all dashes
789        let without_colons = cell.trim_start_matches(':').trim_end_matches(':');
790
791        // Must have at least one dash
792        if without_colons.is_empty() || !without_colons.chars().all(|c| c == '-') {
793            return None;
794        }
795
796        // Determine alignment from colon positions
797        let alignment = match (starts_colon, ends_colon) {
798            (true, true) => Alignment::Center,
799            (true, false) => Alignment::Left,
800            (false, true) => Alignment::Right,
801            (false, false) => Alignment::Default,
802        };
803
804        alignments.push(alignment);
805    }
806
807    // Must have at least one column
808    if alignments.is_empty() {
809        None
810    } else {
811        Some(alignments)
812    }
813}
814
815/// Split a pipe table row into cells.
816/// Handles escaped pipes (\|) properly by not splitting on them.
817fn parse_pipe_table_row(line: &str) -> Vec<String> {
818    let trimmed = line.trim();
819
820    let mut cells = Vec::new();
821    let mut current_cell = String::new();
822    let mut chars = trimmed.chars().peekable();
823    let mut char_count = 0;
824
825    while let Some(ch) = chars.next() {
826        char_count += 1;
827        match ch {
828            '\\' => {
829                // Check if next char is a pipe - if so, it's an escaped pipe
830                if let Some(&'|') = chars.peek() {
831                    current_cell.push('\\');
832                    current_cell.push('|');
833                    chars.next(); // consume the pipe
834                } else {
835                    current_cell.push(ch);
836                }
837            }
838            '|' => {
839                // Check if this is the leading pipe (first character)
840                if char_count == 1 {
841                    continue; // Skip leading pipe
842                }
843
844                // End current cell, start new one
845                cells.push(current_cell.trim().to_string());
846                current_cell.clear();
847            }
848            _ => {
849                current_cell.push(ch);
850            }
851        }
852    }
853
854    // Add last cell if it's not empty (it would be empty if line ended with pipe)
855    let trimmed_cell = current_cell.trim().to_string();
856    if !trimmed_cell.is_empty() {
857        cells.push(trimmed_cell);
858    }
859
860    cells
861}
862
863/// Emit a pipe table row with inline-parsed cells.
864/// Preserves losslessness by emitting exact byte representation while parsing cell content inline.
865fn emit_pipe_table_row(
866    builder: &mut GreenNodeBuilder<'static>,
867    line: &str,
868    row_kind: SyntaxKind,
869    config: &Config,
870) {
871    builder.start_node(row_kind.into());
872
873    let (line_without_newline, newline_str) = strip_newline(line);
874    let trimmed = line_without_newline.trim();
875
876    // Parse cell boundaries
877    let mut cell_starts = Vec::new();
878    let mut cell_ends = Vec::new();
879    let mut in_escape = false;
880
881    // Find all pipe positions (excluding escaped ones)
882    let mut pipe_positions = Vec::new();
883    for (i, ch) in trimmed.char_indices() {
884        if in_escape {
885            in_escape = false;
886            continue;
887        }
888        if ch == '\\' {
889            in_escape = true;
890            continue;
891        }
892        if ch == '|' {
893            pipe_positions.push(i);
894        }
895    }
896
897    // Determine cell boundaries based on pipe positions
898    if pipe_positions.is_empty() {
899        // No pipes - treat entire line as one cell (shouldn't happen for valid pipe tables)
900        cell_starts.push(0);
901        cell_ends.push(trimmed.len());
902    } else {
903        // Check if line starts with pipe
904        let start_pipe = pipe_positions.first() == Some(&0);
905        // Check if line ends with pipe
906        let end_pipe = pipe_positions.last() == Some(&(trimmed.len() - 1));
907
908        if start_pipe {
909            // Skip first pipe
910            for i in 1..pipe_positions.len() {
911                cell_starts.push(pipe_positions[i - 1] + 1);
912                cell_ends.push(pipe_positions[i]);
913            }
914            // Add last cell if there's no trailing pipe
915            if !end_pipe {
916                cell_starts.push(*pipe_positions.last().unwrap() + 1);
917                cell_ends.push(trimmed.len());
918            }
919        } else {
920            // No leading pipe
921            cell_starts.push(0);
922            cell_ends.push(pipe_positions[0]);
923
924            for i in 1..pipe_positions.len() {
925                cell_starts.push(pipe_positions[i - 1] + 1);
926                cell_ends.push(pipe_positions[i]);
927            }
928
929            // Add last cell if there's no trailing pipe
930            if !end_pipe {
931                cell_starts.push(*pipe_positions.last().unwrap() + 1);
932                cell_ends.push(trimmed.len());
933            }
934        }
935    }
936
937    // Emit leading whitespace if present (before trim)
938    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
939    if leading_ws_len > 0 {
940        builder.token(
941            SyntaxKind::WHITESPACE.into(),
942            &line_without_newline[..leading_ws_len],
943        );
944    }
945
946    // Emit cells with pipes
947    for (idx, (start, end)) in cell_starts.iter().zip(cell_ends.iter()).enumerate() {
948        // Emit pipe before cell (except for first cell if no leading pipe)
949        if *start > 0 {
950            builder.token(SyntaxKind::TEXT.into(), "|");
951        } else if idx == 0 && trimmed.starts_with('|') {
952            // Leading pipe
953            builder.token(SyntaxKind::TEXT.into(), "|");
954        }
955
956        // Get cell content with its whitespace
957        let cell_with_ws = &trimmed[*start..*end];
958        let cell_content = cell_with_ws.trim();
959
960        // Emit leading whitespace within cell
961        let cell_leading_ws = &cell_with_ws[..cell_with_ws.len() - cell_with_ws.trim_start().len()];
962        if !cell_leading_ws.is_empty() {
963            builder.token(SyntaxKind::WHITESPACE.into(), cell_leading_ws);
964        }
965
966        // Emit cell with inline parsing
967        emit_table_cell(builder, cell_content, config);
968
969        // Emit trailing whitespace within cell
970        let cell_trailing_ws_start = cell_leading_ws.len() + cell_content.len();
971        if cell_trailing_ws_start < cell_with_ws.len() {
972            builder.token(
973                SyntaxKind::WHITESPACE.into(),
974                &cell_with_ws[cell_trailing_ws_start..],
975            );
976        }
977    }
978
979    // Emit trailing pipe if present
980    if !pipe_positions.is_empty() && trimmed.ends_with('|') {
981        builder.token(SyntaxKind::TEXT.into(), "|");
982    }
983
984    // Emit trailing whitespace after trim (before newline)
985    let trailing_ws_start = leading_ws_len + trimmed.len();
986    if trailing_ws_start < line_without_newline.len() {
987        builder.token(
988            SyntaxKind::WHITESPACE.into(),
989            &line_without_newline[trailing_ws_start..],
990        );
991    }
992
993    // Emit newline
994    if !newline_str.is_empty() {
995        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
996    }
997
998    builder.finish_node();
999}
1000
1001/// Try to parse a pipe table starting at the given position.
1002/// Returns the number of lines consumed if successful.
1003pub(crate) fn try_parse_pipe_table(
1004    lines: &[&str],
1005    start_pos: usize,
1006    builder: &mut GreenNodeBuilder<'static>,
1007    config: &Config,
1008) -> Option<usize> {
1009    if start_pos + 1 >= lines.len() {
1010        return None;
1011    }
1012
1013    // Check if this line is a caption followed by a table
1014    // If so, the actual table starts after the caption and blank line
1015    let (actual_start, has_caption_before) = if is_caption_followed_by_table(lines, start_pos) {
1016        // Skip caption line
1017        let mut pos = start_pos + 1;
1018        // Skip blank line if present
1019        while pos < lines.len() && lines[pos].trim().is_empty() {
1020            pos += 1;
1021        }
1022        (pos, true)
1023    } else {
1024        (start_pos, false)
1025    };
1026
1027    if actual_start + 1 >= lines.len() {
1028        return None;
1029    }
1030
1031    // First line should have pipes (potential header)
1032    let header_line = lines[actual_start];
1033    if !header_line.contains('|') {
1034        return None;
1035    }
1036
1037    // Second line should be separator
1038    let separator_line = lines[actual_start + 1];
1039    let alignments = try_parse_pipe_separator(separator_line)?;
1040
1041    // Parse header cells
1042    let header_cells = parse_pipe_table_row(header_line);
1043
1044    // Number of columns should match (approximately - be lenient)
1045    if header_cells.len() != alignments.len() && !header_cells.is_empty() {
1046        // Only fail if very different
1047        if header_cells.len() < alignments.len() / 2 || header_cells.len() > alignments.len() * 2 {
1048            return None;
1049        }
1050    }
1051
1052    // Find table end (first blank line or end of input)
1053    let mut end_pos = actual_start + 2;
1054    while end_pos < lines.len() {
1055        let line = lines[end_pos];
1056        if line.trim().is_empty() {
1057            break;
1058        }
1059        // Row should have pipes
1060        if !line.contains('|') {
1061            break;
1062        }
1063        end_pos += 1;
1064    }
1065
1066    // Must have at least one data row
1067    if end_pos <= actual_start + 2 {
1068        return None;
1069    }
1070
1071    // Check for caption before table (only if we didn't already detect it)
1072    let caption_before = if has_caption_before {
1073        Some((start_pos, start_pos + 1)) // Single-line caption detected earlier
1074    } else {
1075        find_caption_before_table(lines, actual_start)
1076    };
1077
1078    // Check for caption after table
1079    let caption_after = find_caption_after_table(lines, end_pos);
1080
1081    // Build the pipe table
1082    builder.start_node(SyntaxKind::PIPE_TABLE.into());
1083
1084    // Emit caption before if present
1085    if let Some((cap_start, cap_end)) = caption_before {
1086        emit_table_caption(builder, lines, cap_start, cap_end, config);
1087        // Emit blank line between caption and table if present
1088        if cap_end < actual_start {
1089            for line in lines.iter().take(actual_start).skip(cap_end) {
1090                if line.trim().is_empty() {
1091                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1092                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1093                    builder.finish_node();
1094                }
1095            }
1096        }
1097    }
1098
1099    // Emit header row with inline-parsed cells
1100    emit_pipe_table_row(builder, header_line, SyntaxKind::TABLE_HEADER, config);
1101
1102    // Emit separator
1103    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1104    emit_line_tokens(builder, separator_line);
1105    builder.finish_node();
1106
1107    // Emit data rows with inline-parsed cells
1108    for line in lines.iter().take(end_pos).skip(actual_start + 2) {
1109        emit_pipe_table_row(builder, line, SyntaxKind::TABLE_ROW, config);
1110    }
1111
1112    // Emit caption after if present
1113    if let Some((cap_start, cap_end)) = caption_after {
1114        // Emit blank line before caption if needed
1115        if cap_start > end_pos {
1116            builder.start_node(SyntaxKind::BLANK_LINE.into());
1117            builder.token(SyntaxKind::BLANK_LINE.into(), "\n");
1118            builder.finish_node();
1119        }
1120        emit_table_caption(builder, lines, cap_start, cap_end, config);
1121    }
1122
1123    builder.finish_node(); // PipeTable
1124
1125    // Calculate lines consumed
1126    let table_start = caption_before
1127        .map(|(start, _)| start)
1128        .unwrap_or(actual_start);
1129    let table_end = if let Some((_, cap_end)) = caption_after {
1130        cap_end
1131    } else {
1132        end_pos
1133    };
1134
1135    Some(table_end - table_start)
1136}
1137
1138#[cfg(test)]
1139mod tests {
1140    use super::*;
1141
1142    #[test]
1143    fn test_separator_detection() {
1144        assert!(try_parse_table_separator("------- ------ ----------   -------").is_some());
1145        assert!(try_parse_table_separator("  ---  ---  ---").is_some());
1146        assert!(try_parse_table_separator("-------").is_none()); // horizontal rule
1147        assert!(try_parse_table_separator("--- --- ---").is_some()); // table separator
1148    }
1149
1150    #[test]
1151    fn test_column_extraction() {
1152        let line = "-------     ------ ----------   -------";
1153        let columns = extract_columns(line, 0);
1154        assert_eq!(columns.len(), 4);
1155    }
1156
1157    #[test]
1158    fn test_simple_table_with_header() {
1159        let input = vec![
1160            "  Right     Left     Center     Default",
1161            "-------     ------ ----------   -------",
1162            "     12     12        12            12",
1163            "    123     123       123          123",
1164            "",
1165        ];
1166
1167        let mut builder = GreenNodeBuilder::new();
1168        let result = try_parse_simple_table(&input, 0, &mut builder, &Config::default());
1169
1170        assert!(result.is_some());
1171        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1172    }
1173
1174    #[test]
1175    fn test_headerless_table() {
1176        let input = vec![
1177            "-------     ------ ----------   -------",
1178            "     12     12        12            12",
1179            "    123     123       123          123",
1180            "",
1181        ];
1182
1183        let mut builder = GreenNodeBuilder::new();
1184        let result = try_parse_simple_table(&input, 0, &mut builder, &Config::default());
1185
1186        assert!(result.is_some());
1187        assert_eq!(result.unwrap(), 3); // sep + 2 rows
1188    }
1189
1190    #[test]
1191    fn test_caption_prefix_detection() {
1192        assert!(try_parse_caption_prefix("Table: My caption").is_some());
1193        assert!(try_parse_caption_prefix("table: My caption").is_some());
1194        assert!(try_parse_caption_prefix(": My caption").is_some());
1195        assert!(try_parse_caption_prefix(":").is_none()); // Just colon, no content
1196        assert!(try_parse_caption_prefix("Not a caption").is_none());
1197    }
1198
1199    #[test]
1200    fn bare_colon_fenced_code_is_not_table_caption() {
1201        let input = "Term\n: ```\n  code\n  ```\n";
1202        let tree = crate::parse(input, None);
1203
1204        assert!(
1205            tree.descendants()
1206                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1207            "should parse as definition list"
1208        );
1209        assert!(
1210            tree.descendants()
1211                .any(|node| node.kind() == SyntaxKind::CODE_BLOCK),
1212            "definition should preserve fenced code block"
1213        );
1214        assert!(
1215            !tree
1216                .descendants()
1217                .any(|node| node.kind() == SyntaxKind::TABLE_CAPTION),
1218            "fenced code definition should not be parsed as table caption"
1219        );
1220    }
1221
1222    #[test]
1223    fn test_table_with_caption_after() {
1224        let input = vec![
1225            "  Right     Left     Center     Default",
1226            "-------     ------ ----------   -------",
1227            "     12     12        12            12",
1228            "    123     123       123          123",
1229            "",
1230            "Table: Demonstration of simple table syntax.",
1231            "",
1232        ];
1233
1234        let mut builder = GreenNodeBuilder::new();
1235        let result = try_parse_simple_table(&input, 0, &mut builder, &Config::default());
1236
1237        assert!(result.is_some());
1238        // Should consume: header + sep + 2 rows + blank + caption
1239        assert_eq!(result.unwrap(), 6);
1240    }
1241
1242    #[test]
1243    fn test_table_with_caption_before() {
1244        let input = vec![
1245            "Table: Demonstration of simple table syntax.",
1246            "",
1247            "  Right     Left     Center     Default",
1248            "-------     ------ ----------   -------",
1249            "     12     12        12            12",
1250            "    123     123       123          123",
1251            "",
1252        ];
1253
1254        let mut builder = GreenNodeBuilder::new();
1255        let result = try_parse_simple_table(&input, 2, &mut builder, &Config::default());
1256
1257        assert!(result.is_some());
1258        // Should consume: caption + blank + header + sep + 2 rows
1259        assert_eq!(result.unwrap(), 6);
1260    }
1261
1262    #[test]
1263    fn test_caption_with_colon_prefix() {
1264        let input = vec![
1265            "  Right     Left",
1266            "-------     ------",
1267            "     12     12",
1268            "",
1269            ": Short caption",
1270            "",
1271        ];
1272
1273        let mut builder = GreenNodeBuilder::new();
1274        let result = try_parse_simple_table(&input, 0, &mut builder, &Config::default());
1275
1276        assert!(result.is_some());
1277        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1278    }
1279
1280    #[test]
1281    fn test_multiline_caption() {
1282        let input = vec![
1283            "  Right     Left",
1284            "-------     ------",
1285            "     12     12",
1286            "",
1287            "Table: This is a longer caption",
1288            "that spans multiple lines.",
1289            "",
1290        ];
1291
1292        let mut builder = GreenNodeBuilder::new();
1293        let result = try_parse_simple_table(&input, 0, &mut builder, &Config::default());
1294
1295        assert!(result.is_some());
1296        // Should consume through end of multi-line caption
1297        assert_eq!(result.unwrap(), 6);
1298    }
1299
1300    // Pipe table tests
1301    #[test]
1302    fn test_pipe_separator_detection() {
1303        assert!(try_parse_pipe_separator("|------:|:-----|---------|:------:|").is_some());
1304        assert!(try_parse_pipe_separator("|---|---|").is_some());
1305        assert!(try_parse_pipe_separator("-----|-----:").is_some()); // No leading pipe
1306        assert!(try_parse_pipe_separator("|-----+-------|").is_some()); // Orgtbl variant
1307        assert!(try_parse_pipe_separator("not a separator").is_none());
1308    }
1309
1310    #[test]
1311    fn test_pipe_alignments() {
1312        let aligns = try_parse_pipe_separator("|------:|:-----|---------|:------:|").unwrap();
1313        assert_eq!(aligns.len(), 4);
1314        assert_eq!(aligns[0], Alignment::Right);
1315        assert_eq!(aligns[1], Alignment::Left);
1316        assert_eq!(aligns[2], Alignment::Default);
1317        assert_eq!(aligns[3], Alignment::Center);
1318    }
1319
1320    #[test]
1321    fn test_parse_pipe_table_row() {
1322        let cells = parse_pipe_table_row("| Right | Left | Center |");
1323        assert_eq!(cells.len(), 3);
1324        assert_eq!(cells[0], "Right");
1325        assert_eq!(cells[1], "Left");
1326        assert_eq!(cells[2], "Center");
1327
1328        // Without leading/trailing pipes
1329        let cells2 = parse_pipe_table_row("Right | Left | Center");
1330        assert_eq!(cells2.len(), 3);
1331    }
1332
1333    #[test]
1334    fn test_basic_pipe_table() {
1335        let input = vec![
1336            "",
1337            "| Right | Left | Center |",
1338            "|------:|:-----|:------:|",
1339            "|   12  |  12  |   12   |",
1340            "|  123  |  123 |  123   |",
1341            "",
1342        ];
1343
1344        let mut builder = GreenNodeBuilder::new();
1345        let result = try_parse_pipe_table(&input, 1, &mut builder, &Config::default());
1346
1347        assert!(result.is_some());
1348        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1349    }
1350
1351    #[test]
1352    fn test_pipe_table_no_edge_pipes() {
1353        let input = vec![
1354            "",
1355            "fruit| price",
1356            "-----|-----:",
1357            "apple|2.05",
1358            "pear|1.37",
1359            "",
1360        ];
1361
1362        let mut builder = GreenNodeBuilder::new();
1363        let result = try_parse_pipe_table(&input, 1, &mut builder, &Config::default());
1364
1365        assert!(result.is_some());
1366        assert_eq!(result.unwrap(), 4);
1367    }
1368
1369    #[test]
1370    fn test_pipe_table_with_caption() {
1371        let input = vec![
1372            "",
1373            "| Col1 | Col2 |",
1374            "|------|------|",
1375            "| A    | B    |",
1376            "",
1377            "Table: My pipe table",
1378            "",
1379        ];
1380
1381        let mut builder = GreenNodeBuilder::new();
1382        let result = try_parse_pipe_table(&input, 1, &mut builder, &Config::default());
1383
1384        assert!(result.is_some());
1385        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1386    }
1387}
1388
1389// ============================================================================
1390// Grid Table Parsing
1391// ============================================================================
1392
1393/// Check if a line is a grid table row separator (starts with +, contains -, ends with +).
1394/// Returns Some(vec of column info) if valid, None otherwise.
1395fn try_parse_grid_separator(line: &str) -> Option<Vec<GridColumn>> {
1396    let trimmed = line.trim_start();
1397    let leading_spaces = line.len() - trimmed.len();
1398
1399    // Must have leading spaces <= 3 to not be a code block
1400    if leading_spaces > 3 {
1401        return None;
1402    }
1403
1404    // Must start with + and end with +
1405    if !trimmed.starts_with('+') || !trimmed.trim_end().ends_with('+') {
1406        return None;
1407    }
1408
1409    // Split by + to get column segments
1410    let trimmed = trimmed.trim_end();
1411    let segments: Vec<&str> = trimmed.split('+').collect();
1412
1413    // Need at least 3 parts: empty before first +, column(s), empty after last +
1414    if segments.len() < 3 {
1415        return None;
1416    }
1417
1418    let mut columns = Vec::new();
1419
1420    // Parse each segment between + signs
1421    for segment in segments.iter().skip(1).take(segments.len() - 2) {
1422        if segment.is_empty() {
1423            continue;
1424        }
1425
1426        // Segment must be dashes/equals with optional colons for alignment
1427        let seg_trimmed = *segment;
1428
1429        // Get the fill character (after removing colons)
1430        let inner = seg_trimmed.trim_start_matches(':').trim_end_matches(':');
1431
1432        // Must be all dashes or all equals
1433        if inner.is_empty() {
1434            return None;
1435        }
1436
1437        let first_char = inner.chars().next().unwrap();
1438        if first_char != '-' && first_char != '=' {
1439            return None;
1440        }
1441
1442        if !inner.chars().all(|c| c == first_char) {
1443            return None;
1444        }
1445
1446        let is_header_sep = first_char == '=';
1447
1448        columns.push(GridColumn {
1449            is_header_separator: is_header_sep,
1450            width: seg_trimmed.chars().count(),
1451        });
1452    }
1453
1454    if columns.is_empty() {
1455        None
1456    } else {
1457        Some(columns)
1458    }
1459}
1460
1461/// Column information for grid tables.
1462#[derive(Debug, Clone)]
1463struct GridColumn {
1464    is_header_separator: bool,
1465    width: usize,
1466}
1467
1468fn slice_cell_by_display_width(line: &str, start_byte: usize, width: usize) -> (usize, usize) {
1469    let mut end_byte = start_byte;
1470    let mut display_cols = 0usize;
1471
1472    for (offset, ch) in line[start_byte..].char_indices() {
1473        if ch == '|' {
1474            let sep_byte = start_byte + offset;
1475            return (sep_byte, sep_byte + 1);
1476        }
1477        let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
1478        if display_cols + ch_width > width {
1479            break;
1480        }
1481        display_cols += ch_width;
1482        end_byte = start_byte + offset + ch.len_utf8();
1483        if display_cols >= width {
1484            break;
1485        }
1486    }
1487
1488    // If the width budget is exhausted before seeing a separator (for example
1489    // because of padding/layout drift), advance to the next literal separator
1490    // to keep row slicing aligned and preserve losslessness.
1491    let mut sep_byte = end_byte;
1492    while sep_byte < line.len() {
1493        let mut chars = line[sep_byte..].chars();
1494        let Some(ch) = chars.next() else {
1495            break;
1496        };
1497        if ch == '|' {
1498            return (sep_byte, sep_byte + 1);
1499        }
1500        sep_byte += ch.len_utf8();
1501    }
1502
1503    (end_byte, end_byte)
1504}
1505
1506/// Check if a line is a grid table content row.
1507/// Accepts normal rows ending with `|` and spanning-style continuation lines ending with `+`.
1508fn is_grid_content_row(line: &str) -> bool {
1509    let trimmed = line.trim_start();
1510    let leading_spaces = line.len() - trimmed.len();
1511
1512    if leading_spaces > 3 {
1513        return false;
1514    }
1515
1516    let trimmed = trimmed.trim_end();
1517    trimmed.starts_with('|') && (trimmed.ends_with('|') || trimmed.ends_with('+'))
1518}
1519
1520/// Extract cell contents from a single grid table row line.
1521/// Returns a vector of cell contents (trimmed) based on column boundaries.
1522/// Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1523fn extract_grid_cells_from_line(line: &str, _columns: &[GridColumn]) -> Vec<String> {
1524    let (line_content, _) = strip_newline(line);
1525    let line_trimmed = line_content.trim();
1526
1527    if !line_trimmed.starts_with('|') || !line_trimmed.ends_with('|') {
1528        return vec![String::new(); _columns.len()];
1529    }
1530
1531    let mut cells = Vec::with_capacity(_columns.len());
1532    let mut pos_byte = 1; // Skip leading pipe
1533
1534    for col in _columns {
1535        let col_idx = cells.len();
1536        if pos_byte >= line_trimmed.len() {
1537            cells.push(String::new());
1538            continue;
1539        }
1540
1541        let start_byte = pos_byte;
1542        let end_byte = if col_idx + 1 == _columns.len() {
1543            line_trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1544        } else {
1545            let (end, next_start) = slice_cell_by_display_width(line_trimmed, pos_byte, col.width);
1546            pos_byte = next_start;
1547            end
1548        };
1549        cells.push(line_trimmed[start_byte..end_byte].trim().to_string());
1550        if col_idx + 1 == _columns.len() {
1551            pos_byte = line_trimmed.len();
1552        }
1553    }
1554
1555    cells
1556}
1557
1558/// Extract cell contents from multiple grid table row lines (for multi-line cells).
1559/// Concatenates cell contents across lines with newlines, then trims.
1560fn extract_grid_cells_multiline(lines: &[&str], columns: &[GridColumn]) -> Vec<String> {
1561    if lines.is_empty() {
1562        return vec![String::new(); columns.len()];
1563    }
1564
1565    extract_grid_cells_from_line(lines[0], columns)
1566}
1567
1568/// Emit a grid table row with inline-parsed cells.
1569/// Handles multi-line rows by emitting first line with TABLE_CELL nodes,
1570/// then continuation lines as raw TEXT for losslessness.
1571fn emit_grid_table_row(
1572    builder: &mut GreenNodeBuilder<'static>,
1573    lines: &[&str],
1574    columns: &[GridColumn],
1575    row_kind: SyntaxKind,
1576    config: &Config,
1577) {
1578    if lines.is_empty() {
1579        return;
1580    }
1581
1582    // Extract cell contents from the first line.
1583    let cell_contents = extract_grid_cells_multiline(lines, columns);
1584
1585    builder.start_node(row_kind.into());
1586
1587    // Emit first line with TABLE_CELL nodes
1588    // Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1589    let first_line = lines[0];
1590    let (line_without_newline, newline_str) = strip_newline(first_line);
1591    let trimmed = line_without_newline.trim();
1592    let expected_pipe_count = columns.len().saturating_add(1);
1593    let actual_pipe_count = trimmed.chars().filter(|&c| c == '|').count();
1594
1595    // Rows that don't contain all expected column separators (spanning-style rows)
1596    // must be emitted verbatim for losslessness.
1597    if actual_pipe_count != expected_pipe_count {
1598        emit_line_tokens(builder, first_line);
1599        for line in lines.iter().skip(1) {
1600            emit_line_tokens(builder, line);
1601        }
1602        builder.finish_node();
1603        return;
1604    }
1605
1606    // Emit leading whitespace
1607    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1608    if leading_ws_len > 0 {
1609        builder.token(
1610            SyntaxKind::WHITESPACE.into(),
1611            &line_without_newline[..leading_ws_len],
1612        );
1613    }
1614
1615    // Emit leading pipe
1616    if trimmed.starts_with('|') {
1617        builder.token(SyntaxKind::TEXT.into(), "|");
1618    }
1619
1620    // Emit each cell based on fixed column widths from separators
1621    let mut pos_byte = 1usize; // after leading pipe
1622    for (idx, cell_content) in cell_contents.iter().enumerate() {
1623        let part = if idx < columns.len() && pos_byte <= trimmed.len() {
1624            let start_byte = pos_byte;
1625            let end_byte = if idx + 1 == columns.len() && !trimmed.is_empty() {
1626                trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1627            } else {
1628                let (end, next_start) =
1629                    slice_cell_by_display_width(trimmed, pos_byte, columns[idx].width);
1630                pos_byte = next_start;
1631                end
1632            };
1633            let slice = &trimmed[start_byte..end_byte];
1634            if idx + 1 == columns.len() {
1635                pos_byte = trimmed.len();
1636            }
1637            slice
1638        } else {
1639            ""
1640        };
1641
1642        // Emit leading whitespace in cell
1643        let cell_trimmed = part.trim();
1644        let ws_start_len = part.len() - part.trim_start().len();
1645        if ws_start_len > 0 {
1646            builder.token(SyntaxKind::WHITESPACE.into(), &part[..ws_start_len]);
1647        }
1648
1649        // Emit TABLE_CELL with inline parsing
1650        emit_table_cell(builder, cell_content, config);
1651
1652        // Emit trailing whitespace in cell
1653        let ws_end_start = ws_start_len + cell_trimmed.len();
1654        if ws_end_start < part.len() {
1655            builder.token(SyntaxKind::WHITESPACE.into(), &part[ws_end_start..]);
1656        }
1657
1658        // Emit pipe separator (unless this is the last cell and line doesn't end with |)
1659        if idx < cell_contents.len() - 1 || trimmed.ends_with('|') {
1660            builder.token(SyntaxKind::TEXT.into(), "|");
1661        }
1662    }
1663
1664    // Emit trailing whitespace before newline
1665    let trailing_ws_start = leading_ws_len + trimmed.len();
1666    if trailing_ws_start < line_without_newline.len() {
1667        builder.token(
1668            SyntaxKind::WHITESPACE.into(),
1669            &line_without_newline[trailing_ws_start..],
1670        );
1671    }
1672
1673    // Emit newline
1674    if !newline_str.is_empty() {
1675        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1676    }
1677
1678    // Emit continuation lines as TEXT for losslessness
1679    for line in lines.iter().skip(1) {
1680        emit_line_tokens(builder, line);
1681    }
1682
1683    builder.finish_node();
1684}
1685
1686/// Try to parse a grid table starting at the given position.
1687/// Returns the number of lines consumed if successful.
1688pub(crate) fn try_parse_grid_table(
1689    lines: &[&str],
1690    start_pos: usize,
1691    builder: &mut GreenNodeBuilder<'static>,
1692    config: &Config,
1693) -> Option<usize> {
1694    if start_pos >= lines.len() {
1695        return None;
1696    }
1697
1698    // Check if this line is a caption followed by a table
1699    // If so, the actual table starts after the caption and blank line
1700    let (actual_start, has_caption_before) = if is_caption_followed_by_table(lines, start_pos) {
1701        // Skip caption line
1702        let mut pos = start_pos + 1;
1703        // Skip blank line if present
1704        while pos < lines.len() && lines[pos].trim().is_empty() {
1705            pos += 1;
1706        }
1707        (pos, true)
1708    } else {
1709        (start_pos, false)
1710    };
1711
1712    if actual_start >= lines.len() {
1713        return None;
1714    }
1715
1716    // First line must be a grid separator
1717    let first_line = lines[actual_start];
1718    let _columns = try_parse_grid_separator(first_line)?;
1719
1720    // Track table structure
1721    let mut end_pos = actual_start + 1;
1722    let mut found_header_sep = false;
1723    let mut in_footer = false;
1724
1725    // Scan table lines
1726    while end_pos < lines.len() {
1727        let line = lines[end_pos];
1728
1729        // Check for blank line (table ends)
1730        if line.trim().is_empty() {
1731            break;
1732        }
1733
1734        // Check for separator line
1735        if let Some(sep_cols) = try_parse_grid_separator(line) {
1736            // Check if this is a header separator (=)
1737            if sep_cols.iter().any(|c| c.is_header_separator) {
1738                if !found_header_sep {
1739                    found_header_sep = true;
1740                } else if !in_footer {
1741                    // Second = separator starts footer
1742                    in_footer = true;
1743                }
1744            }
1745            end_pos += 1;
1746            continue;
1747        }
1748
1749        // Check for content row
1750        if is_grid_content_row(line) {
1751            end_pos += 1;
1752            continue;
1753        }
1754
1755        // Not a valid grid table line - table ends
1756        break;
1757    }
1758
1759    // Must have consumed at least 3 lines (top separator, content, bottom separator)
1760    // Or just top + content rows that end with a separator
1761    if end_pos <= actual_start + 1 {
1762        return None;
1763    }
1764
1765    // Last consumed line should be a separator for a well-formed table
1766    // But we'll be lenient and accept tables ending with content rows
1767
1768    // Check for caption before table (only if we didn't already detected it)
1769    let caption_before = if has_caption_before {
1770        Some((start_pos, start_pos + 1)) // Single-line caption detected earlier
1771    } else {
1772        find_caption_before_table(lines, actual_start)
1773    };
1774
1775    // Check for caption after table
1776    let caption_after = find_caption_after_table(lines, end_pos);
1777
1778    // Build the grid table
1779    builder.start_node(SyntaxKind::GRID_TABLE.into());
1780
1781    // Emit caption before if present
1782    if let Some((cap_start, cap_end)) = caption_before {
1783        emit_table_caption(builder, lines, cap_start, cap_end, config);
1784        // Emit blank line between caption and table if present
1785        if cap_end < actual_start {
1786            for line in lines.iter().take(actual_start).skip(cap_end) {
1787                if line.trim().is_empty() {
1788                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1789                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1790                    builder.finish_node();
1791                }
1792            }
1793        }
1794    }
1795
1796    // Track whether we've passed the header separator
1797    let mut past_header_sep = false;
1798    let mut in_footer_section = false;
1799    let mut current_row_lines: Vec<&str> = Vec::new();
1800    let mut current_row_kind = SyntaxKind::TABLE_HEADER;
1801
1802    // Emit table rows - accumulate multi-line cells
1803    for line in lines.iter().take(end_pos).skip(actual_start) {
1804        if let Some(sep_cols) = try_parse_grid_separator(line) {
1805            // Separator line - emit any accumulated row first
1806            if !current_row_lines.is_empty() {
1807                emit_grid_table_row(
1808                    builder,
1809                    &current_row_lines,
1810                    &sep_cols,
1811                    current_row_kind,
1812                    config,
1813                );
1814                current_row_lines.clear();
1815            }
1816
1817            let is_header_sep = sep_cols.iter().any(|c| c.is_header_separator);
1818
1819            if is_header_sep {
1820                if !past_header_sep {
1821                    // This is the header/body separator
1822                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1823                    emit_line_tokens(builder, line);
1824                    builder.finish_node();
1825                    past_header_sep = true;
1826                } else {
1827                    // Footer separator
1828                    if !in_footer_section {
1829                        in_footer_section = true;
1830                    }
1831                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1832                    emit_line_tokens(builder, line);
1833                    builder.finish_node();
1834                }
1835            } else {
1836                // Regular separator (row boundary)
1837                builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1838                emit_line_tokens(builder, line);
1839                builder.finish_node();
1840            }
1841        } else if is_grid_content_row(line) {
1842            // Content row - accumulate for multi-line cells
1843            current_row_kind = if !past_header_sep && found_header_sep {
1844                SyntaxKind::TABLE_HEADER
1845            } else if in_footer_section {
1846                SyntaxKind::TABLE_FOOTER
1847            } else {
1848                SyntaxKind::TABLE_ROW
1849            };
1850
1851            current_row_lines.push(line);
1852        }
1853    }
1854
1855    // Emit any remaining accumulated row
1856    if !current_row_lines.is_empty() {
1857        // Use first separator's columns for cell boundaries
1858        if let Some(sep_cols) = try_parse_grid_separator(lines[actual_start]) {
1859            emit_grid_table_row(
1860                builder,
1861                &current_row_lines,
1862                &sep_cols,
1863                current_row_kind,
1864                config,
1865            );
1866        }
1867    }
1868
1869    // Emit caption after if present
1870    if let Some((cap_start, cap_end)) = caption_after {
1871        if cap_start > end_pos {
1872            builder.start_node(SyntaxKind::BLANK_LINE.into());
1873            builder.token(SyntaxKind::BLANK_LINE.into(), "\n");
1874            builder.finish_node();
1875        }
1876        emit_table_caption(builder, lines, cap_start, cap_end, config);
1877    }
1878
1879    builder.finish_node(); // GRID_TABLE
1880
1881    // Calculate lines consumed
1882    let table_start = caption_before
1883        .map(|(start, _)| start)
1884        .unwrap_or(actual_start);
1885    let table_end = if let Some((_, cap_end)) = caption_after {
1886        cap_end
1887    } else {
1888        end_pos
1889    };
1890
1891    Some(table_end - table_start)
1892}
1893
1894#[cfg(test)]
1895mod grid_table_tests {
1896    use super::*;
1897
1898    #[test]
1899    fn test_grid_separator_detection() {
1900        assert!(try_parse_grid_separator("+---+---+").is_some());
1901        assert!(try_parse_grid_separator("+===+===+").is_some());
1902        assert!(try_parse_grid_separator("+---------------+---------------+").is_some());
1903        assert!(try_parse_grid_separator("+:---:+").is_some()); // center aligned
1904        assert!(try_parse_grid_separator("not a separator").is_none());
1905        assert!(try_parse_grid_separator("|---|---|").is_none()); // pipe table sep
1906    }
1907
1908    #[test]
1909    fn test_grid_header_separator() {
1910        let cols = try_parse_grid_separator("+===+===+").unwrap();
1911        assert!(cols.iter().all(|c| c.is_header_separator));
1912
1913        let cols2 = try_parse_grid_separator("+---+---+").unwrap();
1914        assert!(cols2.iter().all(|c| !c.is_header_separator));
1915    }
1916
1917    #[test]
1918    fn test_grid_content_row_detection() {
1919        assert!(is_grid_content_row("| content | content |"));
1920        assert!(is_grid_content_row("|  |  |"));
1921        assert!(is_grid_content_row("| content +------+"));
1922        assert!(!is_grid_content_row("+---+---+")); // separator, not content
1923        assert!(!is_grid_content_row("no pipes here"));
1924    }
1925
1926    #[test]
1927    fn test_basic_grid_table() {
1928        let input = vec![
1929            "+-------+-------+",
1930            "| Col1  | Col2  |",
1931            "+=======+=======+",
1932            "| A     | B     |",
1933            "+-------+-------+",
1934            "",
1935        ];
1936
1937        let mut builder = GreenNodeBuilder::new();
1938        let result = try_parse_grid_table(&input, 0, &mut builder, &Config::default());
1939
1940        assert!(result.is_some());
1941        assert_eq!(result.unwrap(), 5);
1942    }
1943
1944    #[test]
1945    fn test_grid_table_multirow() {
1946        let input = vec![
1947            "+---------------+---------------+",
1948            "| Fruit         | Advantages    |",
1949            "+===============+===============+",
1950            "| Bananas       | - wrapper     |",
1951            "|               | - color       |",
1952            "+---------------+---------------+",
1953            "| Oranges       | - scurvy      |",
1954            "|               | - tasty       |",
1955            "+---------------+---------------+",
1956            "",
1957        ];
1958
1959        let mut builder = GreenNodeBuilder::new();
1960        let result = try_parse_grid_table(&input, 0, &mut builder, &Config::default());
1961
1962        assert!(result.is_some());
1963        assert_eq!(result.unwrap(), 9);
1964    }
1965
1966    #[test]
1967    fn test_grid_table_with_footer() {
1968        let input = vec![
1969            "+-------+-------+",
1970            "| Fruit | Price |",
1971            "+=======+=======+",
1972            "| Apple | $1.00 |",
1973            "+-------+-------+",
1974            "| Pear  | $1.50 |",
1975            "+=======+=======+",
1976            "| Total | $2.50 |",
1977            "+=======+=======+",
1978            "",
1979        ];
1980
1981        let mut builder = GreenNodeBuilder::new();
1982        let result = try_parse_grid_table(&input, 0, &mut builder, &Config::default());
1983
1984        assert!(result.is_some());
1985        assert_eq!(result.unwrap(), 9);
1986    }
1987
1988    #[test]
1989    fn test_grid_table_headerless() {
1990        let input = vec![
1991            "+-------+-------+",
1992            "| A     | B     |",
1993            "+-------+-------+",
1994            "| C     | D     |",
1995            "+-------+-------+",
1996            "",
1997        ];
1998
1999        let mut builder = GreenNodeBuilder::new();
2000        let result = try_parse_grid_table(&input, 0, &mut builder, &Config::default());
2001
2002        assert!(result.is_some());
2003        assert_eq!(result.unwrap(), 5);
2004    }
2005
2006    #[test]
2007    fn test_grid_table_with_caption_before() {
2008        let input = vec![
2009            ": Sample table",
2010            "",
2011            "+-------+-------+",
2012            "| A     | B     |",
2013            "+=======+=======+",
2014            "| C     | D     |",
2015            "+-------+-------+",
2016            "",
2017        ];
2018
2019        let mut builder = GreenNodeBuilder::new();
2020        let result = try_parse_grid_table(&input, 2, &mut builder, &Config::default());
2021
2022        assert!(result.is_some());
2023        // Should include caption + blank + table
2024        assert_eq!(result.unwrap(), 7);
2025    }
2026
2027    #[test]
2028    fn test_grid_table_with_caption_after() {
2029        let input = vec![
2030            "+-------+-------+",
2031            "| A     | B     |",
2032            "+=======+=======+",
2033            "| C     | D     |",
2034            "+-------+-------+",
2035            "",
2036            "Table: My grid table",
2037            "",
2038        ];
2039
2040        let mut builder = GreenNodeBuilder::new();
2041        let result = try_parse_grid_table(&input, 0, &mut builder, &Config::default());
2042
2043        assert!(result.is_some());
2044        // table + blank + caption
2045        assert_eq!(result.unwrap(), 7);
2046    }
2047}
2048
2049// ============================================================================
2050// Multiline Table Parsing
2051// ============================================================================
2052
2053/// Check if a line is a multiline table separator (continuous dashes).
2054/// Multiline table separators span the full width and are all dashes.
2055/// Returns Some(columns) if valid, None otherwise.
2056fn try_parse_multiline_separator(line: &str) -> Option<Vec<Column>> {
2057    let trimmed = line.trim_start();
2058    let leading_spaces = line.len() - trimmed.len();
2059
2060    // Must have leading spaces <= 3 to not be a code block
2061    if leading_spaces > 3 {
2062        return None;
2063    }
2064
2065    let trimmed = trimmed.trim_end();
2066
2067    // Must be all dashes (continuous line of dashes)
2068    if trimmed.is_empty() || !trimmed.chars().all(|c| c == '-') {
2069        return None;
2070    }
2071
2072    // Must have at least 3 dashes
2073    if trimmed.len() < 3 {
2074        return None;
2075    }
2076
2077    // This is a full-width separator - columns will be determined by column separator lines
2078    Some(vec![Column {
2079        start: leading_spaces,
2080        end: leading_spaces + trimmed.len(),
2081        alignment: Alignment::Default,
2082    }])
2083}
2084
2085/// Check if a line is a column separator line for multiline tables.
2086/// Column separators have dashes with spaces between them to define columns.
2087fn is_column_separator(line: &str) -> bool {
2088    try_parse_table_separator(line).is_some() && !line.contains('*') && !line.contains('_')
2089}
2090
2091fn is_headerless_single_row_without_blank(
2092    lines: &[&str],
2093    row_start: usize,
2094    row_end: usize,
2095    columns: &[Column],
2096) -> bool {
2097    if row_start >= row_end {
2098        return false;
2099    }
2100
2101    if row_end - row_start == 1 {
2102        return false;
2103    }
2104
2105    let Some(last_col) = columns.last() else {
2106        return false;
2107    };
2108
2109    for line in lines.iter().take(row_end).skip(row_start + 1) {
2110        let (content, _) = strip_newline(line);
2111        let prefix_end = last_col.start.min(content.len());
2112        if !content[..prefix_end].trim().is_empty() {
2113            return false;
2114        }
2115    }
2116
2117    true
2118}
2119
2120/// Try to parse a multiline table starting at the given position.
2121/// Returns the number of lines consumed if successful.
2122pub(crate) fn try_parse_multiline_table(
2123    lines: &[&str],
2124    start_pos: usize,
2125    builder: &mut GreenNodeBuilder<'static>,
2126    config: &Config,
2127) -> Option<usize> {
2128    if start_pos >= lines.len() {
2129        return None;
2130    }
2131
2132    let first_line = lines[start_pos];
2133
2134    // First line can be either:
2135    // 1. A full-width dash separator (for tables with headers)
2136    // 2. A column separator (for headerless tables)
2137    let is_full_width_start = try_parse_multiline_separator(first_line).is_some();
2138    let is_column_sep_start = !is_full_width_start && is_column_separator(first_line);
2139    let headerless_columns = if is_column_sep_start {
2140        try_parse_table_separator(first_line)
2141    } else {
2142        None
2143    };
2144
2145    if !is_full_width_start && !is_column_sep_start {
2146        return None;
2147    }
2148
2149    // Look ahead to find the structure
2150    let mut pos = start_pos + 1;
2151    let mut found_column_sep = is_column_sep_start; // Already found if headerless
2152    let mut column_sep_pos = if is_column_sep_start { start_pos } else { 0 };
2153    let mut has_header = false;
2154    let mut found_blank_line = false;
2155    let mut found_closing_sep = false;
2156    let mut content_line_count = 0usize;
2157
2158    // Scan for header section and column separator
2159    while pos < lines.len() {
2160        let line = lines[pos];
2161
2162        // Check for column separator (defines columns) - only if we started with full-width
2163        if is_full_width_start && is_column_separator(line) && !found_column_sep {
2164            found_column_sep = true;
2165            column_sep_pos = pos;
2166            has_header = pos > start_pos + 1; // Has header if there's content before column sep
2167            pos += 1;
2168            continue;
2169        }
2170
2171        // Check for blank line (row separator in body)
2172        if line.trim().is_empty() {
2173            found_blank_line = true;
2174            pos += 1;
2175            // Check if next line is a valid closing separator for this table shape.
2176            if pos < lines.len() {
2177                let next = lines[pos];
2178                let is_valid_closer = if is_full_width_start {
2179                    try_parse_multiline_separator(next).is_some()
2180                } else {
2181                    is_column_separator(next)
2182                };
2183                if is_valid_closer {
2184                    found_closing_sep = true;
2185                    pos += 1; // Include the closing separator
2186                    break;
2187                }
2188            }
2189            continue;
2190        }
2191
2192        // Check for closing full-width dashes (only for full-width-start tables).
2193        if is_full_width_start && try_parse_multiline_separator(line).is_some() {
2194            found_closing_sep = true;
2195            pos += 1;
2196            break;
2197        }
2198
2199        // Check for closing column separator (for headerless tables)
2200        if is_column_sep_start && is_column_separator(line) && content_line_count > 0 {
2201            found_closing_sep = true;
2202            pos += 1;
2203            break;
2204        }
2205
2206        // Content row
2207        content_line_count += 1;
2208        pos += 1;
2209    }
2210
2211    // Must have found a column separator to be a valid multiline table
2212    if !found_column_sep {
2213        return None;
2214    }
2215
2216    // Must have had at least one blank line between rows (distinguishes from simple tables)
2217    if !found_blank_line {
2218        if !is_column_sep_start {
2219            return None;
2220        }
2221        let columns = headerless_columns.as_deref()?;
2222        if !is_headerless_single_row_without_blank(lines, start_pos + 1, pos - 1, columns) {
2223            return None;
2224        }
2225    }
2226
2227    // Must have a closing separator
2228    if !found_closing_sep {
2229        return None;
2230    }
2231
2232    // Must have consumed more than just the opening separator
2233    if pos <= start_pos + 2 {
2234        return None;
2235    }
2236
2237    let end_pos = pos;
2238
2239    // Extract column boundaries from the separator line
2240    let columns =
2241        try_parse_table_separator(lines[column_sep_pos]).expect("Column separator must be valid");
2242
2243    // Check for caption before table
2244    let caption_before = find_caption_before_table(lines, start_pos);
2245
2246    // Check for caption after table
2247    let caption_after = find_caption_after_table(lines, end_pos);
2248
2249    // Build the multiline table
2250    builder.start_node(SyntaxKind::MULTILINE_TABLE.into());
2251
2252    // Emit caption before if present
2253    if let Some((cap_start, cap_end)) = caption_before {
2254        emit_table_caption(builder, lines, cap_start, cap_end, config);
2255
2256        // Emit blank line between caption and table if present
2257        if cap_end < start_pos {
2258            for line in lines.iter().take(start_pos).skip(cap_end) {
2259                if line.trim().is_empty() {
2260                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2261                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2262                    builder.finish_node();
2263                }
2264            }
2265        }
2266    }
2267
2268    // Emit opening separator
2269    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2270    emit_line_tokens(builder, lines[start_pos]);
2271    builder.finish_node();
2272
2273    // Track state for emitting
2274    let mut in_header = has_header;
2275    let mut current_row_lines: Vec<&str> = Vec::new();
2276
2277    for (i, line) in lines.iter().enumerate().take(end_pos).skip(start_pos + 1) {
2278        // Column separator (header/body divider)
2279        if i == column_sep_pos {
2280            // Emit any accumulated header lines
2281            if !current_row_lines.is_empty() {
2282                emit_multiline_table_row(
2283                    builder,
2284                    &current_row_lines,
2285                    &columns,
2286                    SyntaxKind::TABLE_HEADER,
2287                    config,
2288                );
2289                current_row_lines.clear();
2290            }
2291
2292            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2293            emit_line_tokens(builder, line);
2294            builder.finish_node();
2295            in_header = false;
2296            continue;
2297        }
2298
2299        // Closing separator (full-width or column separator at end)
2300        if try_parse_multiline_separator(line).is_some() || is_column_separator(line) {
2301            // Emit any accumulated row lines
2302            if !current_row_lines.is_empty() {
2303                let kind = if in_header {
2304                    SyntaxKind::TABLE_HEADER
2305                } else {
2306                    SyntaxKind::TABLE_ROW
2307                };
2308                emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2309                current_row_lines.clear();
2310            }
2311
2312            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2313            emit_line_tokens(builder, line);
2314            builder.finish_node();
2315            continue;
2316        }
2317
2318        // Blank line (row separator)
2319        if line.trim().is_empty() {
2320            // Emit accumulated row
2321            if !current_row_lines.is_empty() {
2322                let kind = if in_header {
2323                    SyntaxKind::TABLE_HEADER
2324                } else {
2325                    SyntaxKind::TABLE_ROW
2326                };
2327                emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2328                current_row_lines.clear();
2329            }
2330
2331            builder.start_node(SyntaxKind::BLANK_LINE.into());
2332            builder.token(SyntaxKind::BLANK_LINE.into(), "\n");
2333            builder.finish_node();
2334            continue;
2335        }
2336
2337        // Content line - accumulate for current row
2338        current_row_lines.push(line);
2339    }
2340
2341    // Emit any remaining accumulated lines
2342    if !current_row_lines.is_empty() {
2343        let kind = if in_header {
2344            SyntaxKind::TABLE_HEADER
2345        } else {
2346            SyntaxKind::TABLE_ROW
2347        };
2348        emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2349    }
2350
2351    // Emit caption after if present
2352    if let Some((cap_start, cap_end)) = caption_after {
2353        if cap_start > end_pos {
2354            builder.start_node(SyntaxKind::BLANK_LINE.into());
2355            builder.token(SyntaxKind::BLANK_LINE.into(), "\n");
2356            builder.finish_node();
2357        }
2358        emit_table_caption(builder, lines, cap_start, cap_end, config);
2359    }
2360
2361    builder.finish_node(); // MultilineTable
2362
2363    // Calculate lines consumed
2364    let table_start = caption_before.map(|(start, _)| start).unwrap_or(start_pos);
2365    let table_end = if let Some((_, cap_end)) = caption_after {
2366        cap_end
2367    } else {
2368        end_pos
2369    };
2370
2371    Some(table_end - table_start)
2372}
2373
2374/// Extract cell contents from first line only (for CST emission).
2375/// Multi-line content will be in continuation TEXT tokens.
2376fn extract_first_line_cell_contents(line: &str, columns: &[Column]) -> Vec<String> {
2377    let (line_content, _) = strip_newline(line);
2378    let mut cells = Vec::new();
2379
2380    for column in columns.iter() {
2381        // Extract FULL text for this column (including whitespace)
2382        let cell_text = if column.end <= line_content.len() {
2383            &line_content[column.start..column.end]
2384        } else if column.start < line_content.len() {
2385            &line_content[column.start..]
2386        } else {
2387            ""
2388        };
2389
2390        cells.push(cell_text.to_string());
2391    }
2392
2393    cells
2394}
2395
2396/// Emit a multiline table row with inline parsing (Phase 7.1).
2397fn emit_multiline_table_row(
2398    builder: &mut GreenNodeBuilder<'static>,
2399    lines: &[&str],
2400    columns: &[Column],
2401    kind: SyntaxKind,
2402    config: &Config,
2403) {
2404    if lines.is_empty() {
2405        return;
2406    }
2407
2408    // Extract cell contents from first line only (for CST losslessness)
2409    let first_line = lines[0];
2410    let cell_contents = extract_first_line_cell_contents(first_line, columns);
2411
2412    builder.start_node(kind.into());
2413
2414    // Emit first line with TABLE_CELL nodes
2415    let (trimmed, newline_str) = strip_newline(first_line);
2416    let mut current_pos = 0;
2417
2418    for (col_idx, column) in columns.iter().enumerate() {
2419        let cell_text = &cell_contents[col_idx];
2420        let cell_start = column.start.min(trimmed.len());
2421        let cell_end = column.end.min(trimmed.len());
2422
2423        // Emit whitespace before cell
2424        if current_pos < cell_start {
2425            builder.token(
2426                SyntaxKind::WHITESPACE.into(),
2427                &trimmed[current_pos..cell_start],
2428            );
2429        }
2430
2431        // Emit cell with inline parsing (first line content only)
2432        emit_table_cell(builder, cell_text, config);
2433
2434        current_pos = cell_end;
2435    }
2436
2437    // Emit trailing whitespace
2438    if current_pos < trimmed.len() {
2439        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
2440    }
2441
2442    // Emit newline
2443    if !newline_str.is_empty() {
2444        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2445    }
2446
2447    // Emit continuation lines as TEXT to preserve exact line structure
2448    for line in lines.iter().skip(1) {
2449        emit_line_tokens(builder, line);
2450    }
2451
2452    builder.finish_node();
2453}
2454
2455#[cfg(test)]
2456mod multiline_table_tests {
2457    use super::*;
2458    use crate::syntax::SyntaxNode;
2459
2460    #[test]
2461    fn test_multiline_separator_detection() {
2462        assert!(
2463            try_parse_multiline_separator(
2464                "-------------------------------------------------------------"
2465            )
2466            .is_some()
2467        );
2468        assert!(try_parse_multiline_separator("---").is_some());
2469        assert!(try_parse_multiline_separator("  -----").is_some()); // with leading spaces
2470        assert!(try_parse_multiline_separator("--").is_none()); // too short
2471        assert!(try_parse_multiline_separator("--- ---").is_none()); // has spaces
2472        assert!(try_parse_multiline_separator("+---+").is_none()); // grid separator
2473    }
2474
2475    #[test]
2476    fn test_basic_multiline_table() {
2477        let input = vec![
2478            "-------------------------------------------------------------",
2479            " Centered   Default           Right Left",
2480            "  Header    Aligned         Aligned Aligned",
2481            "----------- ------- --------------- -------------------------",
2482            "   First    row                12.0 Example of a row that",
2483            "                                    spans multiple lines.",
2484            "",
2485            "  Second    row                 5.0 Here's another one.",
2486            "-------------------------------------------------------------",
2487            "",
2488        ];
2489
2490        let mut builder = GreenNodeBuilder::new();
2491        let result = try_parse_multiline_table(&input, 0, &mut builder, &Config::default());
2492
2493        assert!(result.is_some());
2494        assert_eq!(result.unwrap(), 9);
2495    }
2496
2497    #[test]
2498    fn test_multiline_table_headerless() {
2499        let input = vec![
2500            "----------- ------- --------------- -------------------------",
2501            "   First    row                12.0 Example of a row that",
2502            "                                    spans multiple lines.",
2503            "",
2504            "  Second    row                 5.0 Here's another one.",
2505            "----------- ------- --------------- -------------------------",
2506            "",
2507        ];
2508
2509        let mut builder = GreenNodeBuilder::new();
2510        let result = try_parse_multiline_table(&input, 0, &mut builder, &Config::default());
2511
2512        assert!(result.is_some());
2513        assert_eq!(result.unwrap(), 6);
2514    }
2515
2516    #[test]
2517    fn test_multiline_table_headerless_single_line_is_not_multiline() {
2518        let input = vec![
2519            "-------     ------ ----------   -------",
2520            "     12     12        12             12",
2521            "-------     ------ ----------   -------",
2522            "",
2523            "Not part of table.",
2524            "",
2525        ];
2526
2527        let mut builder = GreenNodeBuilder::new();
2528        let result = try_parse_multiline_table(&input, 0, &mut builder, &Config::default());
2529
2530        assert!(result.is_none());
2531    }
2532
2533    #[test]
2534    fn test_multiline_table_headerless_single_row_continuation_without_blank_line() {
2535        let input = vec![
2536            "----------  ---------  -----------  ---------------------------",
2537            "   First    row               12.0  Example of a row that spans",
2538            "                                    multiple lines.",
2539            "----------  ---------  -----------  ---------------------------",
2540            "",
2541        ];
2542
2543        let mut builder = GreenNodeBuilder::new();
2544        let result = try_parse_multiline_table(&input, 0, &mut builder, &Config::default());
2545
2546        assert!(result.is_some());
2547        assert_eq!(result.unwrap(), 4);
2548    }
2549
2550    #[test]
2551    fn test_multiline_table_with_caption() {
2552        let input = vec![
2553            "-------------------------------------------------------------",
2554            " Col1       Col2",
2555            "----------- -------",
2556            "   A        B",
2557            "",
2558            "-------------------------------------------------------------",
2559            "",
2560            "Table: Here's the caption.",
2561            "",
2562        ];
2563
2564        let mut builder = GreenNodeBuilder::new();
2565        let result = try_parse_multiline_table(&input, 0, &mut builder, &Config::default());
2566
2567        assert!(result.is_some());
2568        // table (6 lines) + blank + caption
2569        assert_eq!(result.unwrap(), 8);
2570    }
2571
2572    #[test]
2573    fn test_multiline_table_single_row() {
2574        let input = vec![
2575            "---------------------------------------------",
2576            " Header1    Header2",
2577            "----------- -----------",
2578            "   Data     More data",
2579            "",
2580            "---------------------------------------------",
2581            "",
2582        ];
2583
2584        let mut builder = GreenNodeBuilder::new();
2585        let result = try_parse_multiline_table(&input, 0, &mut builder, &Config::default());
2586
2587        assert!(result.is_some());
2588        assert_eq!(result.unwrap(), 6);
2589    }
2590
2591    #[test]
2592    fn test_headerless_multiline_table_does_not_close_on_full_width_rule() {
2593        let input = vec![
2594            "- - - - -",
2595            "Third section with underscores.",
2596            "",
2597            "_____",
2598            "",
2599            "> Quote before rule",
2600            ">",
2601            "> ***",
2602            ">",
2603            "> Quote after rule",
2604            "",
2605            "Final paragraph.",
2606            "",
2607            "Here's a horizontal rule:",
2608            "",
2609            "---",
2610            "Text directly after the horizontal rule.",
2611            "",
2612        ];
2613
2614        let mut builder = GreenNodeBuilder::new();
2615        let result = try_parse_multiline_table(&input, 0, &mut builder, &Config::default());
2616
2617        assert!(result.is_none());
2618    }
2619
2620    #[test]
2621    fn test_not_multiline_table() {
2622        // Simple table should not be parsed as multiline
2623        let input = vec![
2624            "  Right     Left     Center     Default",
2625            "-------     ------ ----------   -------",
2626            "     12     12        12            12",
2627            "",
2628        ];
2629
2630        let mut builder = GreenNodeBuilder::new();
2631        let result = try_parse_multiline_table(&input, 0, &mut builder, &Config::default());
2632
2633        // Should not parse because first line isn't a full-width separator
2634        assert!(result.is_none());
2635    }
2636
2637    // Phase 7.1: Unit tests for emit_table_cell() helper
2638    #[test]
2639    fn test_emit_table_cell_plain_text() {
2640        let mut builder = GreenNodeBuilder::new();
2641        emit_table_cell(&mut builder, "Cell", &Config::default());
2642        let green = builder.finish();
2643        let node = SyntaxNode::new_root(green);
2644
2645        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2646        assert_eq!(node.text(), "Cell");
2647
2648        // Should have TEXT child
2649        let children: Vec<_> = node.children_with_tokens().collect();
2650        assert_eq!(children.len(), 1);
2651        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
2652    }
2653
2654    #[test]
2655    fn test_emit_table_cell_with_emphasis() {
2656        let mut builder = GreenNodeBuilder::new();
2657        emit_table_cell(&mut builder, "*italic*", &Config::default());
2658        let green = builder.finish();
2659        let node = SyntaxNode::new_root(green);
2660
2661        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2662        assert_eq!(node.text(), "*italic*");
2663
2664        // Should have EMPHASIS child
2665        let children: Vec<_> = node.children().collect();
2666        assert_eq!(children.len(), 1);
2667        assert_eq!(children[0].kind(), SyntaxKind::EMPHASIS);
2668    }
2669
2670    #[test]
2671    fn test_emit_table_cell_with_code() {
2672        let mut builder = GreenNodeBuilder::new();
2673        emit_table_cell(&mut builder, "`code`", &Config::default());
2674        let green = builder.finish();
2675        let node = SyntaxNode::new_root(green);
2676
2677        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2678        assert_eq!(node.text(), "`code`");
2679
2680        // Should have CODE_SPAN child
2681        let children: Vec<_> = node.children().collect();
2682        assert_eq!(children.len(), 1);
2683        assert_eq!(children[0].kind(), SyntaxKind::INLINE_CODE);
2684    }
2685
2686    #[test]
2687    fn test_emit_table_cell_with_link() {
2688        let mut builder = GreenNodeBuilder::new();
2689        emit_table_cell(&mut builder, "[text](url)", &Config::default());
2690        let green = builder.finish();
2691        let node = SyntaxNode::new_root(green);
2692
2693        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2694        assert_eq!(node.text(), "[text](url)");
2695
2696        // Should have LINK child
2697        let children: Vec<_> = node.children().collect();
2698        assert_eq!(children.len(), 1);
2699        assert_eq!(children[0].kind(), SyntaxKind::LINK);
2700    }
2701
2702    #[test]
2703    fn test_emit_table_cell_with_strong() {
2704        let mut builder = GreenNodeBuilder::new();
2705        emit_table_cell(&mut builder, "**bold**", &Config::default());
2706        let green = builder.finish();
2707        let node = SyntaxNode::new_root(green);
2708
2709        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2710        assert_eq!(node.text(), "**bold**");
2711
2712        // Should have STRONG child
2713        let children: Vec<_> = node.children().collect();
2714        assert_eq!(children.len(), 1);
2715        assert_eq!(children[0].kind(), SyntaxKind::STRONG);
2716    }
2717
2718    #[test]
2719    fn test_emit_table_cell_mixed_inline() {
2720        let mut builder = GreenNodeBuilder::new();
2721        emit_table_cell(&mut builder, "Text **bold** and `code`", &Config::default());
2722        let green = builder.finish();
2723        let node = SyntaxNode::new_root(green);
2724
2725        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2726        assert_eq!(node.text(), "Text **bold** and `code`");
2727
2728        // Should have multiple children: TEXT, STRONG, TEXT, CODE_SPAN
2729        let children: Vec<_> = node.children_with_tokens().collect();
2730        assert!(children.len() >= 4);
2731
2732        // Check some expected types
2733        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
2734        assert_eq!(children[1].kind(), SyntaxKind::STRONG);
2735    }
2736
2737    #[test]
2738    fn test_emit_table_cell_empty() {
2739        let mut builder = GreenNodeBuilder::new();
2740        emit_table_cell(&mut builder, "", &Config::default());
2741        let green = builder.finish();
2742        let node = SyntaxNode::new_root(green);
2743
2744        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2745        assert_eq!(node.text(), "");
2746
2747        // Empty cell should have no children
2748        let children: Vec<_> = node.children_with_tokens().collect();
2749        assert_eq!(children.len(), 0);
2750    }
2751
2752    #[test]
2753    fn test_emit_table_cell_escaped_pipe() {
2754        let mut builder = GreenNodeBuilder::new();
2755        emit_table_cell(&mut builder, r"A \| B", &Config::default());
2756        let green = builder.finish();
2757        let node = SyntaxNode::new_root(green);
2758
2759        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2760        // The escaped pipe should be preserved
2761        assert_eq!(node.text(), r"A \| B");
2762    }
2763}