Skip to main content

panache_parser/parser/blocks/
tables.rs

1//! Simple table parsing for Pandoc's simple_tables extension.
2
3use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6use unicode_width::UnicodeWidthChar;
7
8use crate::parser::utils::attributes::try_parse_trailing_attributes_with_pos;
9use crate::parser::utils::helpers::{emit_line_tokens, strip_newline};
10use crate::parser::utils::inline_emission;
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum Alignment {
14    Left,
15    Right,
16    Center,
17    Default,
18}
19
20/// Column information extracted from the separator line.
21#[derive(Debug, Clone)]
22pub(crate) struct Column {
23    /// Start position (byte index) in the line
24    start: usize,
25    /// End position (byte index) in the line
26    end: usize,
27    /// Column alignment
28    alignment: Alignment,
29}
30
31/// Try to detect if a line is a table separator line.
32/// Returns Some(column positions) if it's a valid separator.
33pub(crate) fn try_parse_table_separator(line: &str) -> Option<Vec<Column>> {
34    let trimmed = line.trim_start();
35    // Strip trailing newline if present (CRLF or LF)
36    let (trimmed, newline_str) = strip_newline(trimmed);
37    let leading_spaces = line.len() - trimmed.len() - newline_str.len();
38
39    // Must have leading spaces <= 3 to not be a code block
40    if leading_spaces > 3 {
41        return None;
42    }
43
44    // Simple tables only use dashed separators.
45    if trimmed.contains('*') || trimmed.contains('_') {
46        return None;
47    }
48
49    // Must contain at least one dash
50    if !trimmed.contains('-') {
51        return None;
52    }
53
54    // A separator line consists of dashes and spaces
55    if !trimmed.chars().all(|c| c == '-' || c == ' ') {
56        return None;
57    }
58
59    // Must not be a horizontal rule.
60    let dash_groups: Vec<_> = trimmed.split(' ').filter(|s| !s.is_empty()).collect();
61    if dash_groups.len() <= 1 {
62        return None;
63    }
64
65    // Extract column positions from dash groups
66    let columns = extract_columns(trimmed, leading_spaces);
67
68    if columns.is_empty() {
69        return None;
70    }
71
72    Some(columns)
73}
74
75/// Extract column positions from a separator line.
76fn extract_columns(separator: &str, offset: usize) -> Vec<Column> {
77    let mut columns = Vec::new();
78    let mut in_dashes = false;
79    let mut col_start = 0;
80
81    for (i, ch) in separator.char_indices() {
82        match ch {
83            '-' if !in_dashes => {
84                col_start = i + offset;
85                in_dashes = true;
86            }
87            ' ' if in_dashes => {
88                columns.push(Column {
89                    start: col_start,
90                    end: i + offset,
91                    alignment: Alignment::Default, // Will be determined later
92                });
93                in_dashes = false;
94            }
95            _ => {}
96        }
97    }
98
99    // Handle last column
100    if in_dashes {
101        columns.push(Column {
102            start: col_start,
103            end: separator.len() + offset,
104            alignment: Alignment::Default,
105        });
106    }
107
108    columns
109}
110
111/// Convert a character column offset into a UTF-8 byte index for `line`.
112///
113/// Simple-table column boundaries come from ASCII separator lines where
114/// character and byte offsets are identical. Data rows may contain multibyte
115/// characters, so we must remap offsets before slicing.
116fn column_offset_to_byte_index(line: &str, offset: usize) -> usize {
117    line.char_indices()
118        .nth(offset)
119        .map_or(line.len(), |(byte_idx, _)| byte_idx)
120}
121
122/// Try to parse a table caption from a line.
123/// Returns Some((prefix_len, caption_text)) if it's a caption.
124fn try_parse_caption_prefix(line: &str) -> Option<(usize, &str)> {
125    let trimmed = line.trim_start();
126    let leading_spaces = line.len() - trimmed.len();
127
128    // Must have leading spaces <= 3 to not be a code block
129    if leading_spaces > 3 {
130        return None;
131    }
132
133    // Check for "Table:" or "table:" or just ":".
134    if let Some(rest) = trimmed.strip_prefix("Table:") {
135        Some((leading_spaces + 6, rest))
136    } else if let Some(rest) = trimmed.strip_prefix("table:") {
137        Some((leading_spaces + 6, rest))
138    } else if let Some(rest) = trimmed.strip_prefix(':') {
139        // Just ":" caption markers must be followed by whitespace (Pandoc-style).
140        // This avoids accidentally treating constructs like fenced div fences ":::" as captions.
141        if rest.starts_with(|c: char| c.is_whitespace()) {
142            Some((leading_spaces + 1, rest))
143        } else {
144            None
145        }
146    } else {
147        None
148    }
149}
150
151/// Check if a line could be the start of a table caption.
152fn is_table_caption_start(line: &str) -> bool {
153    try_parse_caption_prefix(line).is_some()
154}
155
156fn is_bare_colon_caption_start(line: &str) -> bool {
157    let trimmed = line.trim_start();
158    trimmed.starts_with(':') && !trimmed.starts_with("::") && !trimmed.starts_with(":::")
159}
160
161fn bare_colon_caption_looks_like_definition_code_block(line: &str) -> bool {
162    let Some((_, rest)) = try_parse_caption_prefix(line) else {
163        return false;
164    };
165    let trimmed = rest.trim_start();
166    trimmed.starts_with("```") || trimmed.starts_with("~~~")
167}
168
169fn line_is_fenced_div_fence(line: &str) -> bool {
170    let trimmed = line.trim_start();
171    let colon_count = trimmed.chars().take_while(|&c| c == ':').count();
172    if colon_count < 3 {
173        return false;
174    }
175    let rest = &trimmed[colon_count..];
176    rest.is_empty() || rest.starts_with(char::is_whitespace)
177}
178
179fn is_valid_caption_start_before_table(lines: &[&str], pos: usize) -> bool {
180    if !is_table_caption_start(lines[pos]) {
181        return false;
182    }
183
184    if is_bare_colon_caption_start(lines[pos])
185        && bare_colon_caption_looks_like_definition_code_block(lines[pos])
186    {
187        return false;
188    }
189
190    // Avoid stealing definition-list definitions (":   ...") as table captions.
191    if is_bare_colon_caption_start(lines[pos])
192        && pos > 0
193        && !lines[pos - 1].trim().is_empty()
194        && !line_is_fenced_div_fence(lines[pos - 1])
195    {
196        return false;
197    }
198    true
199}
200
201/// Check if a line could be the start of a grid table.
202/// Grid tables start with a separator line like +---+---+ or +===+===+
203fn is_grid_table_start(line: &str) -> bool {
204    try_parse_grid_separator(line).is_some()
205}
206
207/// Check if a line could be the start of a multiline table.
208/// Multiline tables start with either:
209/// - A full-width dash separator (----)
210/// - A column separator with dashes and spaces (---- ---- ----)
211fn is_multiline_table_start(line: &str) -> bool {
212    try_parse_multiline_separator(line).is_some() || is_column_separator(line)
213}
214
215/// Check if there's a table following a potential caption at this position.
216/// This is used to avoid parsing a caption as a paragraph when it belongs to a table.
217pub(crate) fn is_caption_followed_by_table(lines: &[&str], caption_pos: usize) -> bool {
218    if caption_pos >= lines.len() {
219        return false;
220    }
221
222    // Caption must start with a caption prefix
223    if !is_valid_caption_start_before_table(lines, caption_pos) {
224        return false;
225    }
226
227    let mut pos = caption_pos + 1;
228
229    // Skip continuation lines of caption (non-blank lines).
230    // Stop at fenced-div fences (`:::`) — those close the enclosing div and
231    // must not be folded into the caption.
232    while pos < lines.len()
233        && !lines[pos].trim().is_empty()
234        && !line_is_fenced_div_fence(lines[pos])
235    {
236        // If we hit a table separator, we found a table
237        if try_parse_table_separator(lines[pos]).is_some() {
238            return true;
239        }
240        pos += 1;
241    }
242
243    // Skip one blank line
244    if pos < lines.len() && lines[pos].trim().is_empty() {
245        pos += 1;
246    }
247
248    // Check for table at next position
249    if pos < lines.len() {
250        let line = lines[pos];
251
252        // Check for grid table start (+---+---+ or +===+===+)
253        if is_grid_table_start(line) {
254            return true;
255        }
256
257        // Check for multiline table start (---- or ---- ---- ----)
258        if is_multiline_table_start(line) {
259            return true;
260        }
261
262        // Could be a separator line (simple/pipe table, headerless)
263        if try_parse_table_separator(line).is_some() {
264            return true;
265        }
266
267        // Or could be a header line followed by separator (simple/pipe table with header)
268        if pos + 1 < lines.len() && !line.trim().is_empty() {
269            let next_line = lines[pos + 1];
270            if try_parse_table_separator(next_line).is_some()
271                || try_parse_pipe_separator(next_line).is_some()
272            {
273                return true;
274            }
275        }
276    }
277
278    false
279}
280
281fn caption_range_starting_at(lines: &[&str], start: usize) -> Option<(usize, usize)> {
282    if start >= lines.len() || !is_table_caption_start(lines[start]) {
283        return None;
284    }
285    let mut end = start + 1;
286    while end < lines.len()
287        && !lines[end].trim().is_empty()
288        && !line_is_fenced_div_fence(lines[end])
289    {
290        end += 1;
291    }
292    Some((start, end))
293}
294
295/// Find caption before table (if any).
296/// Returns (caption_start, caption_end) positions, or None.
297fn find_caption_before_table(lines: &[&str], table_start: usize) -> Option<(usize, usize)> {
298    if table_start == 0 {
299        return None;
300    }
301
302    // Look backward for a caption
303    // Caption must be immediately before table (with possible blank line between)
304    let mut pos = table_start - 1;
305
306    // Skip one blank line if present
307    if lines[pos].trim().is_empty() {
308        if pos == 0 {
309            return None;
310        }
311        pos -= 1;
312    }
313
314    // Now pos points to the last non-blank line before the table
315    // This could be the last line of a multiline caption, or a single-line caption
316    let caption_end = pos + 1; // End is exclusive
317
318    // If this line is NOT a caption start, it might be a continuation line
319    // Scan backward through non-blank lines to find the caption start
320    if !is_valid_caption_start_before_table(lines, pos) {
321        // Not a caption start - check if there's a caption start above
322        let mut scan_pos = pos;
323        while scan_pos > 0 {
324            scan_pos -= 1;
325            let line = lines[scan_pos];
326
327            // If we hit a blank line or fenced-div fence, we've gone too far
328            if line.trim().is_empty() || line_is_fenced_div_fence(line) {
329                return None;
330            }
331
332            // If we find a caption start, this is the beginning of the multiline caption
333            if is_valid_caption_start_before_table(lines, scan_pos) {
334                if scan_pos > 0 && !lines[scan_pos - 1].trim().is_empty() {
335                    return None;
336                }
337                if previous_nonblank_looks_like_table(lines, scan_pos) {
338                    return None;
339                }
340                return Some((scan_pos, caption_end));
341            }
342        }
343        // Scanned to beginning without finding caption start
344        None
345    } else {
346        if pos > 0 && !lines[pos - 1].trim().is_empty() {
347            return None;
348        }
349        if previous_nonblank_looks_like_table(lines, pos) {
350            return None;
351        }
352        // This line is a caption start - return the range
353        Some((pos, caption_end))
354    }
355}
356
357fn previous_nonblank_looks_like_table(lines: &[&str], pos: usize) -> bool {
358    if pos == 0 {
359        return false;
360    }
361    let mut i = pos;
362    while i > 0 {
363        i -= 1;
364        let line = lines[i].trim();
365        if line.is_empty() {
366            continue;
367        }
368        return line_looks_like_table_syntax(line);
369    }
370    false
371}
372
373fn line_looks_like_table_syntax(line: &str) -> bool {
374    if line.starts_with('|') && line.matches('|').count() >= 2 {
375        return true;
376    }
377    if line.starts_with('+') && line.ends_with('+') && (line.contains('-') || line.contains('=')) {
378        return true;
379    }
380    try_parse_table_separator(line).is_some()
381        || try_parse_pipe_separator(line).is_some()
382        || try_parse_grid_separator(line).is_some()
383}
384
385/// Find caption after table (if any).
386/// Returns (caption_start, caption_end) positions, or None.
387fn find_caption_after_table(lines: &[&str], table_end: usize) -> Option<(usize, usize)> {
388    if table_end >= lines.len() {
389        return None;
390    }
391
392    let mut pos = table_end;
393
394    // Skip one blank line if present
395    if pos < lines.len() && lines[pos].trim().is_empty() {
396        pos += 1;
397    }
398
399    if pos >= lines.len() {
400        return None;
401    }
402
403    // Check if this line is a caption
404    if is_table_caption_start(lines[pos]) {
405        let caption_start = pos;
406        // Find end of caption (continues until blank line or fenced-div fence)
407        let mut caption_end = caption_start + 1;
408        while caption_end < lines.len()
409            && !lines[caption_end].trim().is_empty()
410            && !line_is_fenced_div_fence(lines[caption_end])
411        {
412            caption_end += 1;
413        }
414        Some((caption_start, caption_end))
415    } else {
416        None
417    }
418}
419
420/// Emit a table caption node.
421/// Emit caption text for a single line. If `lift_trailing_attrs` is set and
422/// the text ends with a balanced `{...}` block, lift it into a structural
423/// `ATTRIBUTE` node so `AttributeNode::cast` finds its id (matches Pandoc's
424/// `+caption_attributes` behavior — `: caption {#tbl-id}` gives the table
425/// the id).
426fn emit_caption_line_text(
427    builder: &mut GreenNodeBuilder<'static>,
428    text_with_newline: &str,
429    config: &ParserOptions,
430    lift_trailing_attrs: bool,
431) {
432    let (text, newline_str) = strip_newline(text_with_newline);
433
434    if lift_trailing_attrs
435        && !text.is_empty()
436        && let Some((_attrs, before_attrs, start_brace_pos)) =
437            try_parse_trailing_attributes_with_pos(text)
438    {
439        let trimmed_len = text.trim_end().len();
440        let space = &text[before_attrs.len()..start_brace_pos];
441        let raw_attrs = &text[start_brace_pos..trimmed_len];
442        let trailing_ws = &text[trimmed_len..];
443
444        if !before_attrs.is_empty() {
445            inline_emission::emit_inlines(builder, before_attrs, config, false);
446        }
447        if !space.is_empty() {
448            builder.token(SyntaxKind::WHITESPACE.into(), space);
449        }
450        builder.start_node(SyntaxKind::ATTRIBUTE.into());
451        builder.token(SyntaxKind::ATTRIBUTE.into(), raw_attrs);
452        builder.finish_node();
453        if !trailing_ws.is_empty() {
454            builder.token(SyntaxKind::WHITESPACE.into(), trailing_ws);
455        }
456        if !newline_str.is_empty() {
457            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
458        }
459        return;
460    }
461
462    if !text.is_empty() {
463        inline_emission::emit_inlines(builder, text, config, false);
464    }
465    if !newline_str.is_empty() {
466        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
467    }
468}
469
470fn emit_table_caption(
471    builder: &mut GreenNodeBuilder<'static>,
472    lines: &[&str],
473    start: usize,
474    end: usize,
475    config: &ParserOptions,
476) {
477    builder.start_node(SyntaxKind::TABLE_CAPTION.into());
478
479    let last_idx = (end - start).saturating_sub(1);
480
481    for (i, line) in lines[start..end].iter().enumerate() {
482        let lift_attrs = i == last_idx;
483        if i == 0 {
484            // First line - parse and emit prefix separately
485            let trimmed = line.trim_start();
486            let leading_ws_len = line.len() - trimmed.len();
487
488            // Emit leading whitespace if present
489            if leading_ws_len > 0 {
490                builder.token(SyntaxKind::WHITESPACE.into(), &line[..leading_ws_len]);
491            }
492
493            // Check for caption prefix and emit separately
494            // Calculate where the prefix ends (after trimmed content)
495            let prefix_and_rest = if line.ends_with('\n') {
496                &line[leading_ws_len..line.len() - 1] // Exclude newline
497            } else {
498                &line[leading_ws_len..]
499            };
500
501            let (prefix_len, prefix_text) = if prefix_and_rest.starts_with("Table: ") {
502                (7, "Table: ")
503            } else if prefix_and_rest.starts_with("table: ") {
504                (7, "table: ")
505            } else if prefix_and_rest.starts_with(": ") {
506                (2, ": ")
507            } else if prefix_and_rest.starts_with(':') {
508                (1, ":")
509            } else {
510                (0, "")
511            };
512
513            if prefix_len > 0 {
514                builder.token(SyntaxKind::TABLE_CAPTION_PREFIX.into(), prefix_text);
515
516                // Emit rest of line after prefix
517                let rest_start = leading_ws_len + prefix_len;
518                if rest_start < line.len() {
519                    emit_caption_line_text(builder, &line[rest_start..], config, lift_attrs);
520                }
521            } else {
522                // No recognized prefix, emit whole trimmed line
523                emit_caption_line_text(builder, &line[leading_ws_len..], config, lift_attrs);
524            }
525        } else {
526            // Continuation lines - emit with inline parsing (attrs only on last line).
527            emit_caption_line_text(builder, line, config, lift_attrs);
528        }
529    }
530
531    builder.finish_node(); // TABLE_CAPTION
532}
533
534/// Emit a table cell with inline content parsing.
535/// This is the core helper for Phase 7.1 table inline parsing migration.
536fn emit_table_cell(
537    builder: &mut GreenNodeBuilder<'static>,
538    cell_text: &str,
539    config: &ParserOptions,
540) {
541    builder.start_node(SyntaxKind::TABLE_CELL.into());
542
543    // Parse inline content within the cell
544    if !cell_text.is_empty() {
545        inline_emission::emit_inlines(builder, cell_text, config, false);
546    }
547
548    builder.finish_node(); // TABLE_CELL
549}
550
551/// Determine column alignments based on separator and optional header.
552fn determine_alignments(columns: &mut [Column], separator_line: &str, header_line: Option<&str>) {
553    for col in columns.iter_mut() {
554        let sep_slice = &separator_line[col.start..col.end];
555
556        if let Some(header) = header_line {
557            let header_start = column_offset_to_byte_index(header, col.start);
558            let header_end = column_offset_to_byte_index(header, col.end);
559
560            // Extract header text for this column
561            let header_text = if header_start < header_end {
562                header[header_start..header_end].trim()
563            } else if header_start < header.len() {
564                header[header_start..].trim()
565            } else {
566                ""
567            };
568
569            if header_text.is_empty() {
570                col.alignment = Alignment::Default;
571                continue;
572            }
573
574            // Find where the header text starts and ends within the column
575            let header_in_col = &header[header_start..header_end];
576            let text_start = header_in_col.len() - header_in_col.trim_start().len();
577            let text_end = header_in_col.trim_end().len() + text_start;
578
579            // Check dash alignment relative to text
580            let dashes_start = 0; // Dashes start at beginning of sep_slice
581            let dashes_end = sep_slice.len();
582
583            let flush_left = dashes_start == text_start;
584            let flush_right = dashes_end == text_end;
585
586            col.alignment = match (flush_left, flush_right) {
587                (true, true) => Alignment::Default,
588                (true, false) => Alignment::Left,
589                (false, true) => Alignment::Right,
590                (false, false) => Alignment::Center,
591            };
592        } else {
593            // Without header, alignment based on first row (we'll handle this later)
594            col.alignment = Alignment::Default;
595        }
596    }
597}
598
599/// Try to parse a simple table starting at the given position.
600/// Returns the number of lines consumed if successful.
601pub(crate) fn try_parse_simple_table(
602    lines: &[&str],
603    start_pos: usize,
604    builder: &mut GreenNodeBuilder<'static>,
605    config: &ParserOptions,
606) -> Option<usize> {
607    log::trace!("try_parse_simple_table at line {}", start_pos + 1);
608
609    if start_pos >= lines.len() {
610        return None;
611    }
612
613    // Look for a separator line
614    let separator_pos = find_separator_line(lines, start_pos)?;
615    log::trace!("  found separator at line {}", separator_pos + 1);
616
617    let separator_line = lines[separator_pos];
618    let mut columns = try_parse_table_separator(separator_line)?;
619
620    // Determine if there's a header (separator not at start)
621    let has_header = separator_pos > start_pos;
622    let header_line = if has_header {
623        Some(lines[separator_pos - 1])
624    } else {
625        None
626    };
627
628    // Determine alignments
629    determine_alignments(&mut columns, separator_line, header_line);
630
631    // Find table end (blank line or end of input)
632    let end_pos = find_table_end(lines, separator_pos + 1);
633
634    // Must have at least one data row (or it's just a separator)
635    let data_rows = end_pos - separator_pos - 1;
636
637    if data_rows == 0 {
638        return None;
639    }
640
641    // Check for caption before table
642    let caption_before = find_caption_before_table(lines, start_pos);
643
644    // Check for caption after table
645    let caption_after = if caption_before.is_some() {
646        None
647    } else {
648        find_caption_after_table(lines, end_pos)
649    };
650
651    // Build the table
652    builder.start_node(SyntaxKind::SIMPLE_TABLE.into());
653
654    // Emit caption before if present
655    if let Some((cap_start, cap_end)) = caption_before {
656        emit_table_caption(builder, lines, cap_start, cap_end, config);
657
658        // Emit blank line between caption and table if present
659        if cap_end < start_pos {
660            for line in lines.iter().take(start_pos).skip(cap_end) {
661                if line.trim().is_empty() {
662                    builder.start_node(SyntaxKind::BLANK_LINE.into());
663                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
664                    builder.finish_node();
665                }
666            }
667        }
668    }
669
670    // Emit header if present
671    if has_header {
672        emit_table_row(
673            builder,
674            lines[separator_pos - 1],
675            &columns,
676            SyntaxKind::TABLE_HEADER,
677            config,
678        );
679    }
680
681    // Emit separator
682    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
683    emit_line_tokens(builder, separator_line);
684    builder.finish_node();
685
686    // Emit data rows
687    for line in lines.iter().take(end_pos).skip(separator_pos + 1) {
688        emit_table_row(builder, line, &columns, SyntaxKind::TABLE_ROW, config);
689    }
690
691    // Emit caption after if present
692    if let Some((cap_start, cap_end)) = caption_after {
693        // Emit blank line before caption if needed
694        if cap_start > end_pos {
695            for line in lines.iter().take(cap_start).skip(end_pos) {
696                if line.trim().is_empty() {
697                    builder.start_node(SyntaxKind::BLANK_LINE.into());
698                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
699                    builder.finish_node();
700                }
701            }
702        }
703        emit_table_caption(builder, lines, cap_start, cap_end, config);
704    }
705
706    builder.finish_node(); // SimpleTable
707
708    // Calculate lines consumed (including captions)
709    let table_start = if let Some((cap_start, _)) = caption_before {
710        cap_start
711    } else if has_header {
712        separator_pos - 1
713    } else {
714        separator_pos
715    };
716
717    let table_end = if let Some((_, cap_end)) = caption_after {
718        cap_end
719    } else {
720        end_pos
721    };
722
723    let lines_consumed = table_end - table_start;
724
725    Some(lines_consumed)
726}
727
728/// Find the position of a separator line starting from pos.
729fn find_separator_line(lines: &[&str], start_pos: usize) -> Option<usize> {
730    log::trace!("  find_separator_line from line {}", start_pos + 1);
731
732    // Check first line
733    log::trace!("    checking first line: {:?}", lines[start_pos]);
734    if try_parse_table_separator(lines[start_pos]).is_some() {
735        log::trace!("    separator found at first line");
736        return Some(start_pos);
737    }
738
739    // Check second line (for table with header)
740    if start_pos + 1 < lines.len()
741        && !lines[start_pos].trim().is_empty()
742        && try_parse_table_separator(lines[start_pos + 1]).is_some()
743    {
744        return Some(start_pos + 1);
745    }
746    None
747}
748
749/// Find where the table ends (first blank line or end of input).
750fn find_table_end(lines: &[&str], start_pos: usize) -> usize {
751    for i in start_pos..lines.len() {
752        if lines[i].trim().is_empty() {
753            return i;
754        }
755        // Check if this could be a closing separator
756        if try_parse_table_separator(lines[i]).is_some() {
757            // Check if next line is blank or end
758            if i + 1 >= lines.len() || lines[i + 1].trim().is_empty() {
759                return i + 1;
760            }
761        }
762    }
763    lines.len()
764}
765
766/// Emit a table row (header or data row) with inline-parsed cells for simple tables.
767/// Uses column boundaries from the separator line to extract cells.
768fn emit_table_row(
769    builder: &mut GreenNodeBuilder<'static>,
770    line: &str,
771    columns: &[Column],
772    row_kind: SyntaxKind,
773    config: &ParserOptions,
774) {
775    builder.start_node(row_kind.into());
776
777    let (line_without_newline, newline_str) = strip_newline(line);
778
779    // Emit leading whitespace if present
780    let trimmed = line_without_newline.trim_start();
781    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
782    if leading_ws_len > 0 {
783        builder.token(
784            SyntaxKind::WHITESPACE.into(),
785            &line_without_newline[..leading_ws_len],
786        );
787    }
788
789    // Track where we are in the line (for losslessness)
790    let mut current_pos = 0;
791
792    // Extract and emit cells based on column boundaries
793    for col in columns.iter() {
794        // Calculate actual positions in the trimmed line (accounting for leading whitespace)
795        let cell_start = if col.start >= leading_ws_len {
796            column_offset_to_byte_index(trimmed, col.start - leading_ws_len)
797        } else {
798            0
799        };
800
801        let cell_end = if col.end >= leading_ws_len {
802            column_offset_to_byte_index(trimmed, col.end - leading_ws_len)
803        } else {
804            0
805        };
806
807        // Extract cell text from column bounds. When the column lies entirely
808        // before the trimmed content (col.end <= leading_ws_len) both bounds
809        // clamp to 0; treat that as an empty cell rather than re-emitting the
810        // whole row.
811        let cell_text = if cell_start < cell_end && cell_start < trimmed.len() {
812            &trimmed[cell_start..cell_end]
813        } else {
814            ""
815        };
816
817        let cell_content = cell_text.trim();
818        let cell_content_start = cell_text.len() - cell_text.trim_start().len();
819
820        // Emit any whitespace from current position to start of cell content
821        let content_abs_pos = (cell_start + cell_content_start).min(trimmed.len());
822        if current_pos < content_abs_pos {
823            builder.token(
824                SyntaxKind::WHITESPACE.into(),
825                &trimmed[current_pos..content_abs_pos],
826            );
827        }
828
829        // Emit cell with inline parsing
830        emit_table_cell(builder, cell_content, config);
831
832        // Update current position to end of cell content
833        current_pos = content_abs_pos + cell_content.len();
834    }
835
836    // Emit any remaining whitespace after last cell
837    if current_pos < trimmed.len() {
838        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
839    }
840
841    // Emit newline if present
842    if !newline_str.is_empty() {
843        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
844    }
845
846    builder.finish_node();
847}
848
849// ============================================================================
850// Pipe Table Parsing
851// ============================================================================
852
853/// Check if a line is a pipe table separator line.
854/// Returns the column alignments if it's a valid separator.
855fn try_parse_pipe_separator(line: &str) -> Option<Vec<Alignment>> {
856    let trimmed = line.trim();
857
858    // Must contain at least one pipe
859    if !trimmed.contains('|') && !trimmed.contains('+') {
860        return None;
861    }
862
863    // Split by pipes (or + for orgtbl variant)
864    let cells: Vec<&str> = if trimmed.contains('+') {
865        // Orgtbl variant: use + as separator in separator line
866        trimmed.split(['|', '+']).collect()
867    } else {
868        trimmed.split('|').collect()
869    };
870
871    let mut alignments = Vec::new();
872
873    for cell in cells {
874        let cell = cell.trim();
875
876        // Skip empty cells (from leading/trailing pipes)
877        if cell.is_empty() {
878            continue;
879        }
880
881        // Must be dashes with optional colons
882        let starts_colon = cell.starts_with(':');
883        let ends_colon = cell.ends_with(':');
884
885        // Remove colons to check if rest is all dashes
886        let without_colons = cell.trim_start_matches(':').trim_end_matches(':');
887
888        // Must have at least one dash
889        if without_colons.is_empty() || !without_colons.chars().all(|c| c == '-') {
890            return None;
891        }
892
893        // Determine alignment from colon positions
894        let alignment = match (starts_colon, ends_colon) {
895            (true, true) => Alignment::Center,
896            (true, false) => Alignment::Left,
897            (false, true) => Alignment::Right,
898            (false, false) => Alignment::Default,
899        };
900
901        alignments.push(alignment);
902    }
903
904    // Must have at least one column
905    if alignments.is_empty() {
906        None
907    } else {
908        Some(alignments)
909    }
910}
911
912/// Split a pipe table row into cells.
913/// Handles escaped pipes (\|) properly by not splitting on them.
914fn parse_pipe_table_row(line: &str) -> Vec<String> {
915    let trimmed = line.trim();
916
917    let mut cells = Vec::new();
918    let mut current_cell = String::new();
919    let mut chars = trimmed.chars().peekable();
920    let mut char_count = 0;
921
922    while let Some(ch) = chars.next() {
923        char_count += 1;
924        match ch {
925            '\\' => {
926                // Check if next char is a pipe - if so, it's an escaped pipe
927                if let Some(&'|') = chars.peek() {
928                    current_cell.push('\\');
929                    current_cell.push('|');
930                    chars.next(); // consume the pipe
931                } else {
932                    current_cell.push(ch);
933                }
934            }
935            '|' => {
936                // Check if this is the leading pipe (first character)
937                if char_count == 1 {
938                    continue; // Skip leading pipe
939                }
940
941                // End current cell, start new one
942                cells.push(current_cell.trim().to_string());
943                current_cell.clear();
944            }
945            _ => {
946                current_cell.push(ch);
947            }
948        }
949    }
950
951    // Add last cell if it's not empty (it would be empty if line ended with pipe)
952    let trimmed_cell = current_cell.trim().to_string();
953    if !trimmed_cell.is_empty() {
954        cells.push(trimmed_cell);
955    }
956
957    cells
958}
959
960/// Emit a pipe table row with inline-parsed cells.
961/// Preserves losslessness by emitting exact byte representation while parsing cell content inline.
962fn emit_pipe_table_row(
963    builder: &mut GreenNodeBuilder<'static>,
964    line: &str,
965    row_kind: SyntaxKind,
966    config: &ParserOptions,
967) {
968    builder.start_node(row_kind.into());
969
970    let (line_without_newline, newline_str) = strip_newline(line);
971    let trimmed = line_without_newline.trim();
972
973    // Parse cell boundaries
974    let mut cell_starts = Vec::new();
975    let mut cell_ends = Vec::new();
976    let mut in_escape = false;
977
978    // Find all pipe positions (excluding escaped ones)
979    let mut pipe_positions = Vec::new();
980    for (i, ch) in trimmed.char_indices() {
981        if in_escape {
982            in_escape = false;
983            continue;
984        }
985        if ch == '\\' {
986            in_escape = true;
987            continue;
988        }
989        if ch == '|' {
990            pipe_positions.push(i);
991        }
992    }
993
994    // Determine cell boundaries based on pipe positions
995    if pipe_positions.is_empty() {
996        // No pipes - treat entire line as one cell (shouldn't happen for valid pipe tables)
997        cell_starts.push(0);
998        cell_ends.push(trimmed.len());
999    } else {
1000        // Check if line starts with pipe
1001        let start_pipe = pipe_positions.first() == Some(&0);
1002        // Check if line ends with pipe
1003        let end_pipe = pipe_positions.last() == Some(&(trimmed.len() - 1));
1004
1005        if start_pipe {
1006            // Skip first pipe
1007            for i in 1..pipe_positions.len() {
1008                cell_starts.push(pipe_positions[i - 1] + 1);
1009                cell_ends.push(pipe_positions[i]);
1010            }
1011            // Add last cell if there's no trailing pipe
1012            if !end_pipe {
1013                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1014                cell_ends.push(trimmed.len());
1015            }
1016        } else {
1017            // No leading pipe
1018            cell_starts.push(0);
1019            cell_ends.push(pipe_positions[0]);
1020
1021            for i in 1..pipe_positions.len() {
1022                cell_starts.push(pipe_positions[i - 1] + 1);
1023                cell_ends.push(pipe_positions[i]);
1024            }
1025
1026            // Add last cell if there's no trailing pipe
1027            if !end_pipe {
1028                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1029                cell_ends.push(trimmed.len());
1030            }
1031        }
1032    }
1033
1034    // Emit leading whitespace if present (before trim)
1035    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1036    if leading_ws_len > 0 {
1037        builder.token(
1038            SyntaxKind::WHITESPACE.into(),
1039            &line_without_newline[..leading_ws_len],
1040        );
1041    }
1042
1043    // Emit cells with pipes
1044    for (idx, (start, end)) in cell_starts.iter().zip(cell_ends.iter()).enumerate() {
1045        // Emit pipe before cell (except for first cell if no leading pipe)
1046        if *start > 0 {
1047            builder.token(SyntaxKind::TEXT.into(), "|");
1048        } else if idx == 0 && trimmed.starts_with('|') {
1049            // Leading pipe
1050            builder.token(SyntaxKind::TEXT.into(), "|");
1051        }
1052
1053        // Get cell content with its whitespace
1054        let cell_with_ws = &trimmed[*start..*end];
1055        let cell_content = cell_with_ws.trim();
1056
1057        // Emit leading whitespace within cell
1058        let cell_leading_ws = &cell_with_ws[..cell_with_ws.len() - cell_with_ws.trim_start().len()];
1059        if !cell_leading_ws.is_empty() {
1060            builder.token(SyntaxKind::WHITESPACE.into(), cell_leading_ws);
1061        }
1062
1063        // Emit cell with inline parsing
1064        emit_table_cell(builder, cell_content, config);
1065
1066        // Emit trailing whitespace within cell
1067        let cell_trailing_ws_start = cell_leading_ws.len() + cell_content.len();
1068        if cell_trailing_ws_start < cell_with_ws.len() {
1069            builder.token(
1070                SyntaxKind::WHITESPACE.into(),
1071                &cell_with_ws[cell_trailing_ws_start..],
1072            );
1073        }
1074    }
1075
1076    // Emit trailing pipe if present
1077    if !pipe_positions.is_empty() && trimmed.ends_with('|') {
1078        builder.token(SyntaxKind::TEXT.into(), "|");
1079    }
1080
1081    // Emit trailing whitespace after trim (before newline)
1082    let trailing_ws_start = leading_ws_len + trimmed.len();
1083    if trailing_ws_start < line_without_newline.len() {
1084        builder.token(
1085            SyntaxKind::WHITESPACE.into(),
1086            &line_without_newline[trailing_ws_start..],
1087        );
1088    }
1089
1090    // Emit newline
1091    if !newline_str.is_empty() {
1092        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1093    }
1094
1095    builder.finish_node();
1096}
1097
1098/// Try to parse a pipe table starting at the given position.
1099/// Returns the number of lines consumed if successful.
1100pub(crate) fn try_parse_pipe_table(
1101    lines: &[&str],
1102    start_pos: usize,
1103    builder: &mut GreenNodeBuilder<'static>,
1104    config: &ParserOptions,
1105) -> Option<usize> {
1106    if start_pos + 1 >= lines.len() {
1107        return None;
1108    }
1109
1110    // Check if this line is a caption followed by a table
1111    // If so, the actual table starts after the caption and blank line
1112    let (actual_start, caption_before) = if is_caption_followed_by_table(lines, start_pos) {
1113        let (cap_start, cap_end) = caption_range_starting_at(lines, start_pos)?;
1114        let mut pos = cap_end;
1115        while pos < lines.len() && lines[pos].trim().is_empty() {
1116            pos += 1;
1117        }
1118        (pos, Some((cap_start, cap_end)))
1119    } else {
1120        (start_pos, None)
1121    };
1122
1123    if actual_start + 1 >= lines.len() {
1124        return None;
1125    }
1126
1127    // First line should have pipes (potential header)
1128    let header_line = lines[actual_start];
1129    if !header_line.contains('|') {
1130        return None;
1131    }
1132
1133    // Second line should be separator
1134    let separator_line = lines[actual_start + 1];
1135    let alignments = try_parse_pipe_separator(separator_line)?;
1136
1137    // Parse header cells
1138    let header_cells = parse_pipe_table_row(header_line);
1139
1140    // Number of columns should match (approximately - be lenient)
1141    if header_cells.len() != alignments.len() && !header_cells.is_empty() {
1142        // Only fail if very different
1143        if header_cells.len() < alignments.len() / 2 || header_cells.len() > alignments.len() * 2 {
1144            return None;
1145        }
1146    }
1147
1148    // Find table end (first blank line or end of input)
1149    let mut end_pos = actual_start + 2;
1150    while end_pos < lines.len() {
1151        let line = lines[end_pos];
1152        if line.trim().is_empty() {
1153            break;
1154        }
1155        // Row should have pipes
1156        if !line.contains('|') {
1157            break;
1158        }
1159        end_pos += 1;
1160    }
1161
1162    // Must have at least one data row
1163    if end_pos <= actual_start + 2 {
1164        return None;
1165    }
1166
1167    // Check for caption before table (only if we didn't already detect it)
1168    let caption_before = caption_before.or_else(|| find_caption_before_table(lines, actual_start));
1169
1170    // Check for caption after table
1171    let caption_after = if caption_before.is_some() {
1172        None
1173    } else {
1174        find_caption_after_table(lines, end_pos)
1175    };
1176
1177    // Build the pipe table
1178    builder.start_node(SyntaxKind::PIPE_TABLE.into());
1179
1180    // Emit caption before if present
1181    if let Some((cap_start, cap_end)) = caption_before {
1182        emit_table_caption(builder, lines, cap_start, cap_end, config);
1183        // Emit blank line between caption and table if present
1184        if cap_end < actual_start {
1185            for line in lines.iter().take(actual_start).skip(cap_end) {
1186                if line.trim().is_empty() {
1187                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1188                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1189                    builder.finish_node();
1190                }
1191            }
1192        }
1193    }
1194
1195    // Emit header row with inline-parsed cells
1196    emit_pipe_table_row(builder, header_line, SyntaxKind::TABLE_HEADER, config);
1197
1198    // Emit separator
1199    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1200    emit_line_tokens(builder, separator_line);
1201    builder.finish_node();
1202
1203    // Emit data rows with inline-parsed cells
1204    for line in lines.iter().take(end_pos).skip(actual_start + 2) {
1205        emit_pipe_table_row(builder, line, SyntaxKind::TABLE_ROW, config);
1206    }
1207
1208    // Emit caption after if present
1209    if let Some((cap_start, cap_end)) = caption_after {
1210        // Emit blank line before caption if needed
1211        if cap_start > end_pos {
1212            for line in lines.iter().take(cap_start).skip(end_pos) {
1213                if line.trim().is_empty() {
1214                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1215                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1216                    builder.finish_node();
1217                }
1218            }
1219        }
1220        emit_table_caption(builder, lines, cap_start, cap_end, config);
1221    }
1222
1223    builder.finish_node(); // PipeTable
1224
1225    // Calculate lines consumed
1226    let table_start = caption_before
1227        .map(|(start, _)| start)
1228        .unwrap_or(actual_start);
1229    let table_end = if let Some((_, cap_end)) = caption_after {
1230        cap_end
1231    } else {
1232        end_pos
1233    };
1234
1235    Some(table_end - table_start)
1236}
1237
1238#[cfg(test)]
1239mod tests {
1240    use super::*;
1241
1242    #[test]
1243    fn test_separator_detection() {
1244        assert!(try_parse_table_separator("------- ------ ----------   -------").is_some());
1245        assert!(try_parse_table_separator("  ---  ---  ---").is_some());
1246        assert!(try_parse_table_separator("-------").is_none()); // horizontal rule
1247        assert!(try_parse_table_separator("--- --- ---").is_some()); // table separator
1248    }
1249
1250    #[test]
1251    fn test_column_extraction() {
1252        let line = "-------     ------ ----------   -------";
1253        let columns = extract_columns(line, 0);
1254        assert_eq!(columns.len(), 4);
1255    }
1256
1257    #[test]
1258    fn test_simple_table_with_header() {
1259        let input = vec![
1260            "  Right     Left     Center     Default",
1261            "-------     ------ ----------   -------",
1262            "     12     12        12            12",
1263            "    123     123       123          123",
1264            "",
1265        ];
1266
1267        let mut builder = GreenNodeBuilder::new();
1268        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1269
1270        assert!(result.is_some());
1271        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1272    }
1273
1274    #[test]
1275    fn test_headerless_table() {
1276        let input = vec![
1277            "-------     ------ ----------   -------",
1278            "     12     12        12            12",
1279            "    123     123       123          123",
1280            "",
1281        ];
1282
1283        let mut builder = GreenNodeBuilder::new();
1284        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1285
1286        assert!(result.is_some());
1287        assert_eq!(result.unwrap(), 3); // sep + 2 rows
1288    }
1289
1290    #[test]
1291    fn test_caption_prefix_detection() {
1292        assert!(try_parse_caption_prefix("Table: My caption").is_some());
1293        assert!(try_parse_caption_prefix("table: My caption").is_some());
1294        assert!(try_parse_caption_prefix(": My caption").is_some());
1295        assert!(try_parse_caption_prefix(":").is_none()); // Just colon, no content
1296        assert!(try_parse_caption_prefix("Not a caption").is_none());
1297    }
1298
1299    #[test]
1300    fn bare_colon_fenced_code_is_not_table_caption() {
1301        let input = "Term\n: ```\n  code\n  ```\n";
1302        let tree = crate::parse(input, None);
1303
1304        assert!(
1305            tree.descendants()
1306                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1307            "should parse as definition list"
1308        );
1309        assert!(
1310            tree.descendants()
1311                .any(|node| node.kind() == SyntaxKind::CODE_BLOCK),
1312            "definition should preserve fenced code block"
1313        );
1314        assert!(
1315            !tree
1316                .descendants()
1317                .any(|node| node.kind() == SyntaxKind::TABLE_CAPTION),
1318            "fenced code definition should not be parsed as table caption"
1319        );
1320    }
1321
1322    #[test]
1323    fn bare_colon_caption_after_div_opening_is_table_caption() {
1324        let input = "::: {#tbl:panel layout.nrow=\"1\"}\n  : My Caption {#tbl:foo-1}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\n  : My Caption2 {#tbl:foo-2}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\nCaption\n:::\n";
1325        let tree = crate::parse(input, None);
1326
1327        let caption_count = tree
1328            .descendants()
1329            .filter(|node| node.kind() == SyntaxKind::TABLE_CAPTION)
1330            .count();
1331        assert_eq!(
1332            caption_count, 2,
1333            "expected both captions to attach to tables"
1334        );
1335        assert!(
1336            !tree
1337                .descendants()
1338                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1339            "caption lines in this fenced div table layout should not parse as definition list"
1340        );
1341    }
1342
1343    #[test]
1344    fn test_table_with_caption_after() {
1345        let input = vec![
1346            "  Right     Left     Center     Default",
1347            "-------     ------ ----------   -------",
1348            "     12     12        12            12",
1349            "    123     123       123          123",
1350            "",
1351            "Table: Demonstration of simple table syntax.",
1352            "",
1353        ];
1354
1355        let mut builder = GreenNodeBuilder::new();
1356        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1357
1358        assert!(result.is_some());
1359        // Should consume: header + sep + 2 rows + blank + caption
1360        assert_eq!(result.unwrap(), 6);
1361    }
1362
1363    #[test]
1364    fn test_table_with_caption_before() {
1365        let input = vec![
1366            "Table: Demonstration of simple table syntax.",
1367            "",
1368            "  Right     Left     Center     Default",
1369            "-------     ------ ----------   -------",
1370            "     12     12        12            12",
1371            "    123     123       123          123",
1372            "",
1373        ];
1374
1375        let mut builder = GreenNodeBuilder::new();
1376        let result = try_parse_simple_table(&input, 2, &mut builder, &ParserOptions::default());
1377
1378        assert!(result.is_some());
1379        // Should consume: caption + blank + header + sep + 2 rows
1380        assert_eq!(result.unwrap(), 6);
1381    }
1382
1383    #[test]
1384    fn test_caption_with_colon_prefix() {
1385        let input = vec![
1386            "  Right     Left",
1387            "-------     ------",
1388            "     12     12",
1389            "",
1390            ": Short caption",
1391            "",
1392        ];
1393
1394        let mut builder = GreenNodeBuilder::new();
1395        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1396
1397        assert!(result.is_some());
1398        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1399    }
1400
1401    #[test]
1402    fn test_multiline_caption() {
1403        let input = vec![
1404            "  Right     Left",
1405            "-------     ------",
1406            "     12     12",
1407            "",
1408            "Table: This is a longer caption",
1409            "that spans multiple lines.",
1410            "",
1411        ];
1412
1413        let mut builder = GreenNodeBuilder::new();
1414        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1415
1416        assert!(result.is_some());
1417        // Should consume through end of multi-line caption
1418        assert_eq!(result.unwrap(), 6);
1419    }
1420
1421    #[test]
1422    fn test_simple_table_with_multibyte_cell_content() {
1423        let input = vec![
1424            "Name            Hex code     Hue     C, M, Y, K (%)   R, G, B (0-255)   R, G, B (%)",
1425            "--------------  ------------ ------- ---------------- ----------------- ------------",
1426            "        orange       #E69F00     41° 0, 50, 100, 0    230, 159, 0       90, 60, 0",
1427            "      sky blue       #56B4E9    202° 80, 0, 0, 0      86, 180, 233      35, 70, 90",
1428            "",
1429        ];
1430
1431        let mut builder = GreenNodeBuilder::new();
1432        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1433
1434        assert!(result.is_some());
1435        assert_eq!(result.unwrap(), 4);
1436    }
1437
1438    // Pipe table tests
1439    #[test]
1440    fn test_pipe_separator_detection() {
1441        assert!(try_parse_pipe_separator("|------:|:-----|---------|:------:|").is_some());
1442        assert!(try_parse_pipe_separator("|---|---|").is_some());
1443        assert!(try_parse_pipe_separator("-----|-----:").is_some()); // No leading pipe
1444        assert!(try_parse_pipe_separator("|-----+-------|").is_some()); // Orgtbl variant
1445        assert!(try_parse_pipe_separator("not a separator").is_none());
1446    }
1447
1448    #[test]
1449    fn test_pipe_alignments() {
1450        let aligns = try_parse_pipe_separator("|------:|:-----|---------|:------:|").unwrap();
1451        assert_eq!(aligns.len(), 4);
1452        assert_eq!(aligns[0], Alignment::Right);
1453        assert_eq!(aligns[1], Alignment::Left);
1454        assert_eq!(aligns[2], Alignment::Default);
1455        assert_eq!(aligns[3], Alignment::Center);
1456    }
1457
1458    #[test]
1459    fn test_parse_pipe_table_row() {
1460        let cells = parse_pipe_table_row("| Right | Left | Center |");
1461        assert_eq!(cells.len(), 3);
1462        assert_eq!(cells[0], "Right");
1463        assert_eq!(cells[1], "Left");
1464        assert_eq!(cells[2], "Center");
1465
1466        // Without leading/trailing pipes
1467        let cells2 = parse_pipe_table_row("Right | Left | Center");
1468        assert_eq!(cells2.len(), 3);
1469    }
1470
1471    #[test]
1472    fn test_basic_pipe_table() {
1473        let input = vec![
1474            "",
1475            "| Right | Left | Center |",
1476            "|------:|:-----|:------:|",
1477            "|   12  |  12  |   12   |",
1478            "|  123  |  123 |  123   |",
1479            "",
1480        ];
1481
1482        let mut builder = GreenNodeBuilder::new();
1483        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1484
1485        assert!(result.is_some());
1486        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1487    }
1488
1489    #[test]
1490    fn test_pipe_table_no_edge_pipes() {
1491        let input = vec![
1492            "",
1493            "fruit| price",
1494            "-----|-----:",
1495            "apple|2.05",
1496            "pear|1.37",
1497            "",
1498        ];
1499
1500        let mut builder = GreenNodeBuilder::new();
1501        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1502
1503        assert!(result.is_some());
1504        assert_eq!(result.unwrap(), 4);
1505    }
1506
1507    #[test]
1508    fn test_pipe_table_with_caption() {
1509        let input = vec![
1510            "",
1511            "| Col1 | Col2 |",
1512            "|------|------|",
1513            "| A    | B    |",
1514            "",
1515            "Table: My pipe table",
1516            "",
1517        ];
1518
1519        let mut builder = GreenNodeBuilder::new();
1520        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1521
1522        assert!(result.is_some());
1523        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1524    }
1525
1526    #[test]
1527    fn test_pipe_table_with_multiline_caption_before() {
1528        let input = vec![
1529            ": (#tab:base) base R quoting",
1530            "functions",
1531            "",
1532            "| C | D |",
1533            "|---|---|",
1534            "| 3 | 4 |",
1535            "",
1536        ];
1537
1538        let mut builder = GreenNodeBuilder::new();
1539        let result = try_parse_pipe_table(&input, 0, &mut builder, &ParserOptions::default());
1540
1541        assert!(result.is_some());
1542        // caption(2) + blank(1) + header + sep + row
1543        assert_eq!(result.unwrap(), 6);
1544    }
1545}
1546
1547// ============================================================================
1548// Grid Table Parsing
1549// ============================================================================
1550
1551/// Check if a line is a grid table row separator (starts with +, contains -, ends with +).
1552/// Returns Some(vec of column info) if valid, None otherwise.
1553fn try_parse_grid_separator(line: &str) -> Option<Vec<GridColumn>> {
1554    let trimmed = line.trim_start();
1555    let leading_spaces = line.len() - trimmed.len();
1556
1557    // Must have leading spaces <= 3 to not be a code block
1558    if leading_spaces > 3 {
1559        return None;
1560    }
1561
1562    // Must start with + and end with +
1563    if !trimmed.starts_with('+') || !trimmed.trim_end().ends_with('+') {
1564        return None;
1565    }
1566
1567    // Split by + to get column segments
1568    let trimmed = trimmed.trim_end();
1569    let segments: Vec<&str> = trimmed.split('+').collect();
1570
1571    // Need at least 3 parts: empty before first +, column(s), empty after last +
1572    if segments.len() < 3 {
1573        return None;
1574    }
1575
1576    let mut columns = Vec::new();
1577
1578    // Parse each segment between + signs
1579    for segment in segments.iter().skip(1).take(segments.len() - 2) {
1580        if segment.is_empty() {
1581            continue;
1582        }
1583
1584        // Segment must be dashes/equals with optional colons for alignment
1585        let seg_trimmed = *segment;
1586
1587        // Get the fill character (after removing colons)
1588        let inner = seg_trimmed.trim_start_matches(':').trim_end_matches(':');
1589
1590        // Must be all dashes or all equals
1591        if inner.is_empty() {
1592            return None;
1593        }
1594
1595        let first_char = inner.chars().next().unwrap();
1596        if first_char != '-' && first_char != '=' {
1597            return None;
1598        }
1599
1600        if !inner.chars().all(|c| c == first_char) {
1601            return None;
1602        }
1603
1604        let is_header_sep = first_char == '=';
1605
1606        columns.push(GridColumn {
1607            is_header_separator: is_header_sep,
1608            width: seg_trimmed.chars().count(),
1609        });
1610    }
1611
1612    if columns.is_empty() {
1613        None
1614    } else {
1615        Some(columns)
1616    }
1617}
1618
1619/// Column information for grid tables.
1620#[derive(Debug, Clone)]
1621struct GridColumn {
1622    is_header_separator: bool,
1623    width: usize,
1624}
1625
1626fn slice_cell_by_display_width(line: &str, start_byte: usize, width: usize) -> (usize, usize) {
1627    let mut end_byte = start_byte;
1628    let mut display_cols = 0usize;
1629
1630    for (offset, ch) in line[start_byte..].char_indices() {
1631        if ch == '|' {
1632            let sep_byte = start_byte + offset;
1633            return (sep_byte, sep_byte + 1);
1634        }
1635        let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
1636        if display_cols + ch_width > width {
1637            break;
1638        }
1639        display_cols += ch_width;
1640        end_byte = start_byte + offset + ch.len_utf8();
1641        if display_cols >= width {
1642            break;
1643        }
1644    }
1645
1646    // If the width budget is exhausted before seeing a separator (for example
1647    // because of padding/layout drift), advance to the next literal separator
1648    // to keep row slicing aligned and preserve losslessness.
1649    let mut sep_byte = end_byte;
1650    while sep_byte < line.len() {
1651        let mut chars = line[sep_byte..].chars();
1652        let Some(ch) = chars.next() else {
1653            break;
1654        };
1655        if ch == '|' {
1656            return (sep_byte, sep_byte + 1);
1657        }
1658        sep_byte += ch.len_utf8();
1659    }
1660
1661    (end_byte, end_byte)
1662}
1663
1664/// Check if a line is a grid table content row.
1665/// Accepts normal rows ending with `|` and spanning-style continuation lines ending with `+`.
1666fn is_grid_content_row(line: &str) -> bool {
1667    let trimmed = line.trim_start();
1668    let leading_spaces = line.len() - trimmed.len();
1669
1670    if leading_spaces > 3 {
1671        return false;
1672    }
1673
1674    let trimmed = trimmed.trim_end();
1675    trimmed.starts_with('|') && (trimmed.ends_with('|') || trimmed.ends_with('+'))
1676}
1677
1678/// Extract cell contents from a single grid table row line.
1679/// Returns a vector of cell contents (trimmed) based on column boundaries.
1680/// Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1681fn extract_grid_cells_from_line(line: &str, _columns: &[GridColumn]) -> Vec<String> {
1682    let (line_content, _) = strip_newline(line);
1683    let line_trimmed = line_content.trim();
1684
1685    if !line_trimmed.starts_with('|') || !line_trimmed.ends_with('|') {
1686        return vec![String::new(); _columns.len()];
1687    }
1688
1689    let mut cells = Vec::with_capacity(_columns.len());
1690    let mut pos_byte = 1; // Skip leading pipe
1691
1692    for col in _columns {
1693        let col_idx = cells.len();
1694        if pos_byte >= line_trimmed.len() {
1695            cells.push(String::new());
1696            continue;
1697        }
1698
1699        let start_byte = pos_byte;
1700        let end_byte = if col_idx + 1 == _columns.len() {
1701            line_trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1702        } else {
1703            let (end, next_start) = slice_cell_by_display_width(line_trimmed, pos_byte, col.width);
1704            pos_byte = next_start;
1705            end
1706        };
1707        cells.push(line_trimmed[start_byte..end_byte].trim().to_string());
1708        if col_idx + 1 == _columns.len() {
1709            pos_byte = line_trimmed.len();
1710        }
1711    }
1712
1713    cells
1714}
1715
1716/// Extract cell contents from multiple grid table row lines (for multi-line cells).
1717/// Concatenates cell contents across lines with newlines, then trims.
1718fn extract_grid_cells_multiline(lines: &[&str], columns: &[GridColumn]) -> Vec<String> {
1719    if lines.is_empty() {
1720        return vec![String::new(); columns.len()];
1721    }
1722
1723    extract_grid_cells_from_line(lines[0], columns)
1724}
1725
1726/// Emit a grid table row with inline-parsed cells.
1727/// Handles multi-line rows by emitting first line with TABLE_CELL nodes,
1728/// then continuation lines as raw TEXT for losslessness.
1729fn emit_grid_table_row(
1730    builder: &mut GreenNodeBuilder<'static>,
1731    lines: &[&str],
1732    columns: &[GridColumn],
1733    row_kind: SyntaxKind,
1734    config: &ParserOptions,
1735) {
1736    if lines.is_empty() {
1737        return;
1738    }
1739
1740    // Extract cell contents from the first line.
1741    let cell_contents = extract_grid_cells_multiline(lines, columns);
1742
1743    builder.start_node(row_kind.into());
1744
1745    // Emit first line with TABLE_CELL nodes
1746    // Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1747    let first_line = lines[0];
1748    let (line_without_newline, newline_str) = strip_newline(first_line);
1749    let trimmed = line_without_newline.trim();
1750    let expected_pipe_count = columns.len().saturating_add(1);
1751    let actual_pipe_count = trimmed.chars().filter(|&c| c == '|').count();
1752
1753    // Rows that don't contain all expected column separators (spanning-style rows)
1754    // must be emitted verbatim for losslessness.
1755    if actual_pipe_count != expected_pipe_count {
1756        emit_line_tokens(builder, first_line);
1757        for line in lines.iter().skip(1) {
1758            emit_line_tokens(builder, line);
1759        }
1760        builder.finish_node();
1761        return;
1762    }
1763
1764    // Emit leading whitespace
1765    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1766    if leading_ws_len > 0 {
1767        builder.token(
1768            SyntaxKind::WHITESPACE.into(),
1769            &line_without_newline[..leading_ws_len],
1770        );
1771    }
1772
1773    // Emit leading pipe
1774    if trimmed.starts_with('|') {
1775        builder.token(SyntaxKind::TEXT.into(), "|");
1776    }
1777
1778    // Emit each cell based on fixed column widths from separators
1779    let mut pos_byte = 1usize; // after leading pipe
1780    for (idx, cell_content) in cell_contents.iter().enumerate() {
1781        let part = if idx < columns.len() && pos_byte <= trimmed.len() {
1782            let start_byte = pos_byte;
1783            let end_byte = if idx + 1 == columns.len() && !trimmed.is_empty() {
1784                trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1785            } else {
1786                let (end, next_start) =
1787                    slice_cell_by_display_width(trimmed, pos_byte, columns[idx].width);
1788                pos_byte = next_start;
1789                end
1790            };
1791            let slice = &trimmed[start_byte..end_byte];
1792            if idx + 1 == columns.len() {
1793                pos_byte = trimmed.len();
1794            }
1795            slice
1796        } else {
1797            ""
1798        };
1799
1800        // Emit leading whitespace in cell
1801        let cell_trimmed = part.trim();
1802        let ws_start_len = part.len() - part.trim_start().len();
1803        if ws_start_len > 0 {
1804            builder.token(SyntaxKind::WHITESPACE.into(), &part[..ws_start_len]);
1805        }
1806
1807        // Emit TABLE_CELL with inline parsing
1808        emit_table_cell(builder, cell_content, config);
1809
1810        // Emit trailing whitespace in cell
1811        let ws_end_start = ws_start_len + cell_trimmed.len();
1812        if ws_end_start < part.len() {
1813            builder.token(SyntaxKind::WHITESPACE.into(), &part[ws_end_start..]);
1814        }
1815
1816        // Emit pipe separator (unless this is the last cell and line doesn't end with |)
1817        if idx < cell_contents.len() - 1 || trimmed.ends_with('|') {
1818            builder.token(SyntaxKind::TEXT.into(), "|");
1819        }
1820    }
1821
1822    // Emit trailing whitespace before newline
1823    let trailing_ws_start = leading_ws_len + trimmed.len();
1824    if trailing_ws_start < line_without_newline.len() {
1825        builder.token(
1826            SyntaxKind::WHITESPACE.into(),
1827            &line_without_newline[trailing_ws_start..],
1828        );
1829    }
1830
1831    // Emit newline
1832    if !newline_str.is_empty() {
1833        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1834    }
1835
1836    // Emit continuation lines as TEXT for losslessness
1837    for line in lines.iter().skip(1) {
1838        emit_line_tokens(builder, line);
1839    }
1840
1841    builder.finish_node();
1842}
1843
1844/// Try to parse a grid table starting at the given position.
1845/// Returns the number of lines consumed if successful.
1846pub(crate) fn try_parse_grid_table(
1847    lines: &[&str],
1848    start_pos: usize,
1849    builder: &mut GreenNodeBuilder<'static>,
1850    config: &ParserOptions,
1851) -> Option<usize> {
1852    if start_pos >= lines.len() {
1853        return None;
1854    }
1855
1856    // Check if this line is a caption followed by a table
1857    // If so, the actual table starts after the caption and blank line
1858    let (actual_start, caption_before) = if is_caption_followed_by_table(lines, start_pos) {
1859        let (cap_start, cap_end) = caption_range_starting_at(lines, start_pos)?;
1860        let mut pos = cap_end;
1861        while pos < lines.len() && lines[pos].trim().is_empty() {
1862            pos += 1;
1863        }
1864        (pos, Some((cap_start, cap_end)))
1865    } else {
1866        (start_pos, None)
1867    };
1868
1869    if actual_start >= lines.len() {
1870        return None;
1871    }
1872
1873    // First line must be a grid separator
1874    let first_line = lines[actual_start];
1875    let _columns = try_parse_grid_separator(first_line)?;
1876
1877    // Track table structure
1878    let mut end_pos = actual_start + 1;
1879    let mut found_header_sep = false;
1880    let mut in_footer = false;
1881
1882    // Scan table lines
1883    while end_pos < lines.len() {
1884        let line = lines[end_pos];
1885
1886        // Check for blank line (table ends)
1887        if line.trim().is_empty() {
1888            break;
1889        }
1890
1891        // Check for separator line
1892        if let Some(sep_cols) = try_parse_grid_separator(line) {
1893            // Check if this is a header separator (=)
1894            if sep_cols.iter().any(|c| c.is_header_separator) {
1895                if !found_header_sep {
1896                    found_header_sep = true;
1897                } else if !in_footer {
1898                    // Second = separator starts footer
1899                    in_footer = true;
1900                }
1901            }
1902            end_pos += 1;
1903            continue;
1904        }
1905
1906        // Check for content row
1907        if is_grid_content_row(line) {
1908            end_pos += 1;
1909            continue;
1910        }
1911
1912        // Not a valid grid table line - table ends
1913        break;
1914    }
1915
1916    // Must have consumed at least 3 lines (top separator, content, bottom separator)
1917    // Or just top + content rows that end with a separator
1918    if end_pos <= actual_start + 1 {
1919        return None;
1920    }
1921
1922    // Last consumed line should be a separator for a well-formed table
1923    // But we'll be lenient and accept tables ending with content rows
1924
1925    // Check for caption before table (only if we didn't already detected it)
1926    let caption_before = caption_before.or_else(|| find_caption_before_table(lines, actual_start));
1927
1928    // Check for caption after table
1929    let caption_after = if caption_before.is_some() {
1930        None
1931    } else {
1932        find_caption_after_table(lines, end_pos)
1933    };
1934
1935    // Build the grid table
1936    builder.start_node(SyntaxKind::GRID_TABLE.into());
1937
1938    // Emit caption before if present
1939    if let Some((cap_start, cap_end)) = caption_before {
1940        emit_table_caption(builder, lines, cap_start, cap_end, config);
1941        // Emit blank line between caption and table if present
1942        if cap_end < actual_start {
1943            for line in lines.iter().take(actual_start).skip(cap_end) {
1944                if line.trim().is_empty() {
1945                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1946                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1947                    builder.finish_node();
1948                }
1949            }
1950        }
1951    }
1952
1953    // Track whether we've passed the header separator
1954    let mut past_header_sep = false;
1955    let mut in_footer_section = false;
1956    let mut current_row_lines: Vec<&str> = Vec::new();
1957    let mut current_row_kind = SyntaxKind::TABLE_HEADER;
1958
1959    // Emit table rows - accumulate multi-line cells
1960    for line in lines.iter().take(end_pos).skip(actual_start) {
1961        if let Some(sep_cols) = try_parse_grid_separator(line) {
1962            // Separator line - emit any accumulated row first
1963            if !current_row_lines.is_empty() {
1964                emit_grid_table_row(
1965                    builder,
1966                    &current_row_lines,
1967                    &sep_cols,
1968                    current_row_kind,
1969                    config,
1970                );
1971                current_row_lines.clear();
1972            }
1973
1974            let is_header_sep = sep_cols.iter().any(|c| c.is_header_separator);
1975
1976            if is_header_sep {
1977                if !past_header_sep {
1978                    // This is the header/body separator
1979                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1980                    emit_line_tokens(builder, line);
1981                    builder.finish_node();
1982                    past_header_sep = true;
1983                } else {
1984                    // Footer separator
1985                    if !in_footer_section {
1986                        in_footer_section = true;
1987                    }
1988                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1989                    emit_line_tokens(builder, line);
1990                    builder.finish_node();
1991                }
1992            } else {
1993                // Regular separator (row boundary)
1994                builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1995                emit_line_tokens(builder, line);
1996                builder.finish_node();
1997            }
1998        } else if is_grid_content_row(line) {
1999            // Content row - accumulate for multi-line cells
2000            current_row_kind = if !past_header_sep && found_header_sep {
2001                SyntaxKind::TABLE_HEADER
2002            } else if in_footer_section {
2003                SyntaxKind::TABLE_FOOTER
2004            } else {
2005                SyntaxKind::TABLE_ROW
2006            };
2007
2008            current_row_lines.push(line);
2009        }
2010    }
2011
2012    // Emit any remaining accumulated row
2013    if !current_row_lines.is_empty() {
2014        // Use first separator's columns for cell boundaries
2015        if let Some(sep_cols) = try_parse_grid_separator(lines[actual_start]) {
2016            emit_grid_table_row(
2017                builder,
2018                &current_row_lines,
2019                &sep_cols,
2020                current_row_kind,
2021                config,
2022            );
2023        }
2024    }
2025
2026    // Emit caption after if present
2027    if let Some((cap_start, cap_end)) = caption_after {
2028        if cap_start > end_pos {
2029            for line in lines.iter().take(cap_start).skip(end_pos) {
2030                if line.trim().is_empty() {
2031                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2032                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2033                    builder.finish_node();
2034                }
2035            }
2036        }
2037        emit_table_caption(builder, lines, cap_start, cap_end, config);
2038    }
2039
2040    builder.finish_node(); // GRID_TABLE
2041
2042    // Calculate lines consumed
2043    let table_start = caption_before
2044        .map(|(start, _)| start)
2045        .unwrap_or(actual_start);
2046    let table_end = if let Some((_, cap_end)) = caption_after {
2047        cap_end
2048    } else {
2049        end_pos
2050    };
2051
2052    Some(table_end - table_start)
2053}
2054
2055#[cfg(test)]
2056mod grid_table_tests {
2057    use super::*;
2058
2059    #[test]
2060    fn test_grid_separator_detection() {
2061        assert!(try_parse_grid_separator("+---+---+").is_some());
2062        assert!(try_parse_grid_separator("+===+===+").is_some());
2063        assert!(try_parse_grid_separator("+---------------+---------------+").is_some());
2064        assert!(try_parse_grid_separator("+:---:+").is_some()); // center aligned
2065        assert!(try_parse_grid_separator("not a separator").is_none());
2066        assert!(try_parse_grid_separator("|---|---|").is_none()); // pipe table sep
2067    }
2068
2069    #[test]
2070    fn test_grid_header_separator() {
2071        let cols = try_parse_grid_separator("+===+===+").unwrap();
2072        assert!(cols.iter().all(|c| c.is_header_separator));
2073
2074        let cols2 = try_parse_grid_separator("+---+---+").unwrap();
2075        assert!(cols2.iter().all(|c| !c.is_header_separator));
2076    }
2077
2078    #[test]
2079    fn test_grid_content_row_detection() {
2080        assert!(is_grid_content_row("| content | content |"));
2081        assert!(is_grid_content_row("|  |  |"));
2082        assert!(is_grid_content_row("| content +------+"));
2083        assert!(!is_grid_content_row("+---+---+")); // separator, not content
2084        assert!(!is_grid_content_row("no pipes here"));
2085    }
2086
2087    #[test]
2088    fn test_basic_grid_table() {
2089        let input = vec![
2090            "+-------+-------+",
2091            "| Col1  | Col2  |",
2092            "+=======+=======+",
2093            "| A     | B     |",
2094            "+-------+-------+",
2095            "",
2096        ];
2097
2098        let mut builder = GreenNodeBuilder::new();
2099        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2100
2101        assert!(result.is_some());
2102        assert_eq!(result.unwrap(), 5);
2103    }
2104
2105    #[test]
2106    fn test_grid_table_multirow() {
2107        let input = vec![
2108            "+---------------+---------------+",
2109            "| Fruit         | Advantages    |",
2110            "+===============+===============+",
2111            "| Bananas       | - wrapper     |",
2112            "|               | - color       |",
2113            "+---------------+---------------+",
2114            "| Oranges       | - scurvy      |",
2115            "|               | - tasty       |",
2116            "+---------------+---------------+",
2117            "",
2118        ];
2119
2120        let mut builder = GreenNodeBuilder::new();
2121        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2122
2123        assert!(result.is_some());
2124        assert_eq!(result.unwrap(), 9);
2125    }
2126
2127    #[test]
2128    fn test_grid_table_with_footer() {
2129        let input = vec![
2130            "+-------+-------+",
2131            "| Fruit | Price |",
2132            "+=======+=======+",
2133            "| Apple | $1.00 |",
2134            "+-------+-------+",
2135            "| Pear  | $1.50 |",
2136            "+=======+=======+",
2137            "| Total | $2.50 |",
2138            "+=======+=======+",
2139            "",
2140        ];
2141
2142        let mut builder = GreenNodeBuilder::new();
2143        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2144
2145        assert!(result.is_some());
2146        assert_eq!(result.unwrap(), 9);
2147    }
2148
2149    #[test]
2150    fn test_grid_table_headerless() {
2151        let input = vec![
2152            "+-------+-------+",
2153            "| A     | B     |",
2154            "+-------+-------+",
2155            "| C     | D     |",
2156            "+-------+-------+",
2157            "",
2158        ];
2159
2160        let mut builder = GreenNodeBuilder::new();
2161        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2162
2163        assert!(result.is_some());
2164        assert_eq!(result.unwrap(), 5);
2165    }
2166
2167    #[test]
2168    fn test_grid_table_with_caption_before() {
2169        let input = vec![
2170            ": Sample table",
2171            "",
2172            "+-------+-------+",
2173            "| A     | B     |",
2174            "+=======+=======+",
2175            "| C     | D     |",
2176            "+-------+-------+",
2177            "",
2178        ];
2179
2180        let mut builder = GreenNodeBuilder::new();
2181        let result = try_parse_grid_table(&input, 2, &mut builder, &ParserOptions::default());
2182
2183        assert!(result.is_some());
2184        // Should include caption + blank + table
2185        assert_eq!(result.unwrap(), 7);
2186    }
2187
2188    #[test]
2189    fn test_grid_table_with_caption_after() {
2190        let input = vec![
2191            "+-------+-------+",
2192            "| A     | B     |",
2193            "+=======+=======+",
2194            "| C     | D     |",
2195            "+-------+-------+",
2196            "",
2197            "Table: My grid table",
2198            "",
2199        ];
2200
2201        let mut builder = GreenNodeBuilder::new();
2202        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2203
2204        assert!(result.is_some());
2205        // table + blank + caption
2206        assert_eq!(result.unwrap(), 7);
2207    }
2208}
2209
2210// ============================================================================
2211// Multiline Table Parsing
2212// ============================================================================
2213
2214/// Check if a line is a multiline table separator (continuous dashes).
2215/// Multiline table separators span the full width and are all dashes.
2216/// Returns Some(columns) if valid, None otherwise.
2217fn try_parse_multiline_separator(line: &str) -> Option<Vec<Column>> {
2218    let trimmed = line.trim_start();
2219    let leading_spaces = line.len() - trimmed.len();
2220
2221    // Must have leading spaces <= 3 to not be a code block
2222    if leading_spaces > 3 {
2223        return None;
2224    }
2225
2226    let trimmed = trimmed.trim_end();
2227
2228    // Must be all dashes (continuous line of dashes)
2229    if trimmed.is_empty() || !trimmed.chars().all(|c| c == '-') {
2230        return None;
2231    }
2232
2233    // Must have at least 3 dashes
2234    if trimmed.len() < 3 {
2235        return None;
2236    }
2237
2238    // This is a full-width separator - columns will be determined by column separator lines
2239    Some(vec![Column {
2240        start: leading_spaces,
2241        end: leading_spaces + trimmed.len(),
2242        alignment: Alignment::Default,
2243    }])
2244}
2245
2246/// Check if a line is a column separator line for multiline tables.
2247/// Column separators have dashes with spaces between them to define columns.
2248fn is_column_separator(line: &str) -> bool {
2249    try_parse_table_separator(line).is_some() && !line.contains('*') && !line.contains('_')
2250}
2251
2252fn is_headerless_single_row_without_blank(
2253    lines: &[&str],
2254    row_start: usize,
2255    row_end: usize,
2256    columns: &[Column],
2257) -> bool {
2258    if row_start >= row_end {
2259        return false;
2260    }
2261
2262    if row_end - row_start == 1 {
2263        return false;
2264    }
2265
2266    let Some(last_col) = columns.last() else {
2267        return false;
2268    };
2269
2270    for line in lines.iter().take(row_end).skip(row_start + 1) {
2271        let (content, _) = strip_newline(line);
2272        let prefix_end = last_col.start.min(content.len());
2273        if !content[..prefix_end].trim().is_empty() {
2274            return false;
2275        }
2276    }
2277
2278    true
2279}
2280
2281/// Try to parse a multiline table starting at the given position.
2282/// Returns the number of lines consumed if successful.
2283pub(crate) fn try_parse_multiline_table(
2284    lines: &[&str],
2285    start_pos: usize,
2286    builder: &mut GreenNodeBuilder<'static>,
2287    config: &ParserOptions,
2288) -> Option<usize> {
2289    if start_pos >= lines.len() {
2290        return None;
2291    }
2292
2293    let first_line = lines[start_pos];
2294
2295    // First line can be either:
2296    // 1. A full-width dash separator (for tables with headers)
2297    // 2. A column separator (for headerless tables)
2298    let is_full_width_start = try_parse_multiline_separator(first_line).is_some();
2299    let is_column_sep_start = !is_full_width_start && is_column_separator(first_line);
2300    let headerless_columns = if is_column_sep_start {
2301        try_parse_table_separator(first_line)
2302    } else {
2303        None
2304    };
2305
2306    if !is_full_width_start && !is_column_sep_start {
2307        return None;
2308    }
2309
2310    // Look ahead to find the structure
2311    let mut pos = start_pos + 1;
2312    let mut found_column_sep = is_column_sep_start; // Already found if headerless
2313    let mut column_sep_pos = if is_column_sep_start { start_pos } else { 0 };
2314    let mut has_header = false;
2315    let mut found_blank_line = false;
2316    let mut found_closing_sep = false;
2317    let mut content_line_count = 0usize;
2318
2319    // Scan for header section and column separator
2320    while pos < lines.len() {
2321        let line = lines[pos];
2322
2323        // Check for column separator (defines columns) - only if we started with full-width
2324        if is_full_width_start && is_column_separator(line) && !found_column_sep {
2325            found_column_sep = true;
2326            column_sep_pos = pos;
2327            has_header = pos > start_pos + 1; // Has header if there's content before column sep
2328            pos += 1;
2329            continue;
2330        }
2331
2332        // Check for blank line (row separator in body)
2333        if line.trim().is_empty() {
2334            found_blank_line = true;
2335            pos += 1;
2336            // Check if next line is a valid closing separator for this table shape.
2337            if pos < lines.len() {
2338                let next = lines[pos];
2339                let is_valid_closer = if is_full_width_start {
2340                    try_parse_multiline_separator(next).is_some()
2341                } else {
2342                    is_column_separator(next)
2343                };
2344                if is_valid_closer {
2345                    found_closing_sep = true;
2346                    pos += 1; // Include the closing separator
2347                    break;
2348                }
2349            }
2350            continue;
2351        }
2352
2353        // Check for closing full-width dashes (only for full-width-start tables).
2354        if is_full_width_start && try_parse_multiline_separator(line).is_some() {
2355            found_closing_sep = true;
2356            pos += 1;
2357            break;
2358        }
2359
2360        // Check for closing column separator (for headerless tables)
2361        if is_column_sep_start && is_column_separator(line) && content_line_count > 0 {
2362            found_closing_sep = true;
2363            pos += 1;
2364            break;
2365        }
2366
2367        // Content row
2368        content_line_count += 1;
2369        pos += 1;
2370    }
2371
2372    // Must have found a column separator to be a valid multiline table
2373    if !found_column_sep {
2374        return None;
2375    }
2376
2377    // Must have had at least one blank line between rows (distinguishes from simple tables)
2378    if !found_blank_line {
2379        if !is_column_sep_start {
2380            return None;
2381        }
2382        let columns = headerless_columns.as_deref()?;
2383        if !is_headerless_single_row_without_blank(lines, start_pos + 1, pos - 1, columns) {
2384            return None;
2385        }
2386    }
2387
2388    // Must have a closing separator
2389    if !found_closing_sep {
2390        return None;
2391    }
2392
2393    // Must have consumed more than just the opening separator
2394    if pos <= start_pos + 2 {
2395        return None;
2396    }
2397
2398    let end_pos = pos;
2399
2400    // Extract column boundaries from the separator line
2401    let columns =
2402        try_parse_table_separator(lines[column_sep_pos]).expect("Column separator must be valid");
2403
2404    // Check for caption before table
2405    let caption_before = find_caption_before_table(lines, start_pos);
2406
2407    // Check for caption after table
2408    let caption_after = if caption_before.is_some() {
2409        None
2410    } else {
2411        find_caption_after_table(lines, end_pos)
2412    };
2413
2414    // Build the multiline table
2415    builder.start_node(SyntaxKind::MULTILINE_TABLE.into());
2416
2417    // Emit caption before if present
2418    if let Some((cap_start, cap_end)) = caption_before {
2419        emit_table_caption(builder, lines, cap_start, cap_end, config);
2420
2421        // Emit blank line between caption and table if present
2422        if cap_end < start_pos {
2423            for line in lines.iter().take(start_pos).skip(cap_end) {
2424                if line.trim().is_empty() {
2425                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2426                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2427                    builder.finish_node();
2428                }
2429            }
2430        }
2431    }
2432
2433    // Emit opening separator
2434    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2435    emit_line_tokens(builder, lines[start_pos]);
2436    builder.finish_node();
2437
2438    // Track state for emitting
2439    let mut in_header = has_header;
2440    let mut current_row_lines: Vec<&str> = Vec::new();
2441
2442    for (i, line) in lines.iter().enumerate().take(end_pos).skip(start_pos + 1) {
2443        // Column separator (header/body divider)
2444        if i == column_sep_pos {
2445            // Emit any accumulated header lines
2446            if !current_row_lines.is_empty() {
2447                emit_multiline_table_row(
2448                    builder,
2449                    &current_row_lines,
2450                    &columns,
2451                    SyntaxKind::TABLE_HEADER,
2452                    config,
2453                );
2454                current_row_lines.clear();
2455            }
2456
2457            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2458            emit_line_tokens(builder, line);
2459            builder.finish_node();
2460            in_header = false;
2461            continue;
2462        }
2463
2464        // Closing separator (full-width or column separator at end)
2465        if try_parse_multiline_separator(line).is_some() || is_column_separator(line) {
2466            // Emit any accumulated row lines
2467            if !current_row_lines.is_empty() {
2468                let kind = if in_header {
2469                    SyntaxKind::TABLE_HEADER
2470                } else {
2471                    SyntaxKind::TABLE_ROW
2472                };
2473                emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2474                current_row_lines.clear();
2475            }
2476
2477            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2478            emit_line_tokens(builder, line);
2479            builder.finish_node();
2480            continue;
2481        }
2482
2483        // Blank line (row separator)
2484        if line.trim().is_empty() {
2485            // Emit accumulated row
2486            if !current_row_lines.is_empty() {
2487                let kind = if in_header {
2488                    SyntaxKind::TABLE_HEADER
2489                } else {
2490                    SyntaxKind::TABLE_ROW
2491                };
2492                emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2493                current_row_lines.clear();
2494            }
2495
2496            builder.start_node(SyntaxKind::BLANK_LINE.into());
2497            builder.token(SyntaxKind::BLANK_LINE.into(), line);
2498            builder.finish_node();
2499            continue;
2500        }
2501
2502        // Content line - accumulate for current row
2503        current_row_lines.push(line);
2504    }
2505
2506    // Emit any remaining accumulated lines
2507    if !current_row_lines.is_empty() {
2508        let kind = if in_header {
2509            SyntaxKind::TABLE_HEADER
2510        } else {
2511            SyntaxKind::TABLE_ROW
2512        };
2513        emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2514    }
2515
2516    // Emit caption after if present
2517    if let Some((cap_start, cap_end)) = caption_after {
2518        if cap_start > end_pos {
2519            for line in lines.iter().take(cap_start).skip(end_pos) {
2520                if line.trim().is_empty() {
2521                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2522                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2523                    builder.finish_node();
2524                }
2525            }
2526        }
2527        emit_table_caption(builder, lines, cap_start, cap_end, config);
2528    }
2529
2530    builder.finish_node(); // MultilineTable
2531
2532    // Calculate lines consumed
2533    let table_start = caption_before.map(|(start, _)| start).unwrap_or(start_pos);
2534    let table_end = if let Some((_, cap_end)) = caption_after {
2535        cap_end
2536    } else {
2537        end_pos
2538    };
2539
2540    Some(table_end - table_start)
2541}
2542
2543/// Extract cell contents from first line only (for CST emission).
2544/// Multi-line content will be in continuation TEXT tokens.
2545fn extract_first_line_cell_contents(line: &str, columns: &[Column]) -> Vec<String> {
2546    let (line_content, _) = strip_newline(line);
2547    let mut cells = Vec::new();
2548
2549    for column in columns.iter() {
2550        let column_start = column_offset_to_byte_index(line_content, column.start);
2551        let column_end = column_offset_to_byte_index(line_content, column.end);
2552
2553        // Extract FULL text for this column (including whitespace)
2554        let cell_text = if column_start < column_end {
2555            &line_content[column_start..column_end]
2556        } else if column_start < line_content.len() {
2557            &line_content[column_start..]
2558        } else {
2559            ""
2560        };
2561
2562        cells.push(cell_text.to_string());
2563    }
2564
2565    cells
2566}
2567
2568/// Emit a multiline table row with inline parsing (Phase 7.1).
2569fn emit_multiline_table_row(
2570    builder: &mut GreenNodeBuilder<'static>,
2571    lines: &[&str],
2572    columns: &[Column],
2573    kind: SyntaxKind,
2574    config: &ParserOptions,
2575) {
2576    if lines.is_empty() {
2577        return;
2578    }
2579
2580    // Extract cell contents from first line only (for CST losslessness)
2581    let first_line = lines[0];
2582    let cell_contents = extract_first_line_cell_contents(first_line, columns);
2583
2584    builder.start_node(kind.into());
2585
2586    // Emit first line with TABLE_CELL nodes
2587    let (trimmed, newline_str) = strip_newline(first_line);
2588    let mut current_pos = 0;
2589
2590    for (col_idx, column) in columns.iter().enumerate() {
2591        let cell_text = &cell_contents[col_idx];
2592        let cell_start = column_offset_to_byte_index(trimmed, column.start);
2593        let cell_end = column_offset_to_byte_index(trimmed, column.end);
2594
2595        // Emit whitespace before cell
2596        if current_pos < cell_start {
2597            builder.token(
2598                SyntaxKind::WHITESPACE.into(),
2599                &trimmed[current_pos..cell_start],
2600            );
2601        }
2602
2603        // Emit cell with inline parsing (first line content only)
2604        emit_table_cell(builder, cell_text, config);
2605
2606        current_pos = cell_end;
2607    }
2608
2609    // Emit trailing whitespace
2610    if current_pos < trimmed.len() {
2611        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
2612    }
2613
2614    // Emit newline
2615    if !newline_str.is_empty() {
2616        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2617    }
2618
2619    // Emit continuation lines as TEXT to preserve exact line structure
2620    for line in lines.iter().skip(1) {
2621        emit_line_tokens(builder, line);
2622    }
2623
2624    builder.finish_node();
2625}
2626
2627#[cfg(test)]
2628mod multiline_table_tests {
2629    use super::*;
2630    use crate::syntax::SyntaxNode;
2631
2632    #[test]
2633    fn test_multiline_separator_detection() {
2634        assert!(
2635            try_parse_multiline_separator(
2636                "-------------------------------------------------------------"
2637            )
2638            .is_some()
2639        );
2640        assert!(try_parse_multiline_separator("---").is_some());
2641        assert!(try_parse_multiline_separator("  -----").is_some()); // with leading spaces
2642        assert!(try_parse_multiline_separator("--").is_none()); // too short
2643        assert!(try_parse_multiline_separator("--- ---").is_none()); // has spaces
2644        assert!(try_parse_multiline_separator("+---+").is_none()); // grid separator
2645    }
2646
2647    #[test]
2648    fn test_basic_multiline_table() {
2649        let input = vec![
2650            "-------------------------------------------------------------",
2651            " Centered   Default           Right Left",
2652            "  Header    Aligned         Aligned Aligned",
2653            "----------- ------- --------------- -------------------------",
2654            "   First    row                12.0 Example of a row that",
2655            "                                    spans multiple lines.",
2656            "",
2657            "  Second    row                 5.0 Here's another one.",
2658            "-------------------------------------------------------------",
2659            "",
2660        ];
2661
2662        let mut builder = GreenNodeBuilder::new();
2663        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2664
2665        assert!(result.is_some());
2666        assert_eq!(result.unwrap(), 9);
2667    }
2668
2669    #[test]
2670    fn test_multiline_table_headerless() {
2671        let input = vec![
2672            "----------- ------- --------------- -------------------------",
2673            "   First    row                12.0 Example of a row that",
2674            "                                    spans multiple lines.",
2675            "",
2676            "  Second    row                 5.0 Here's another one.",
2677            "----------- ------- --------------- -------------------------",
2678            "",
2679        ];
2680
2681        let mut builder = GreenNodeBuilder::new();
2682        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2683
2684        assert!(result.is_some());
2685        assert_eq!(result.unwrap(), 6);
2686    }
2687
2688    #[test]
2689    fn test_multiline_table_headerless_single_line_is_not_multiline() {
2690        let input = vec![
2691            "-------     ------ ----------   -------",
2692            "     12     12        12             12",
2693            "-------     ------ ----------   -------",
2694            "",
2695            "Not part of table.",
2696            "",
2697        ];
2698
2699        let mut builder = GreenNodeBuilder::new();
2700        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2701
2702        assert!(result.is_none());
2703    }
2704
2705    #[test]
2706    fn test_multiline_table_headerless_single_row_continuation_without_blank_line() {
2707        let input = vec![
2708            "----------  ---------  -----------  ---------------------------",
2709            "   First    row               12.0  Example of a row that spans",
2710            "                                    multiple lines.",
2711            "----------  ---------  -----------  ---------------------------",
2712            "",
2713        ];
2714
2715        let mut builder = GreenNodeBuilder::new();
2716        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2717
2718        assert!(result.is_some());
2719        assert_eq!(result.unwrap(), 4);
2720    }
2721
2722    #[test]
2723    fn test_multiline_table_with_caption() {
2724        let input = vec![
2725            "-------------------------------------------------------------",
2726            " Col1       Col2",
2727            "----------- -------",
2728            "   A        B",
2729            "",
2730            "-------------------------------------------------------------",
2731            "",
2732            "Table: Here's the caption.",
2733            "",
2734        ];
2735
2736        let mut builder = GreenNodeBuilder::new();
2737        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2738
2739        assert!(result.is_some());
2740        // table (6 lines) + blank + caption
2741        assert_eq!(result.unwrap(), 8);
2742    }
2743
2744    #[test]
2745    fn test_multiline_table_single_row() {
2746        let input = vec![
2747            "---------------------------------------------",
2748            " Header1    Header2",
2749            "----------- -----------",
2750            "   Data     More data",
2751            "",
2752            "---------------------------------------------",
2753            "",
2754        ];
2755
2756        let mut builder = GreenNodeBuilder::new();
2757        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2758
2759        assert!(result.is_some());
2760        assert_eq!(result.unwrap(), 6);
2761    }
2762
2763    #[test]
2764    fn test_headerless_multiline_table_does_not_close_on_full_width_rule() {
2765        let input = vec![
2766            "- - - - -",
2767            "Third section with underscores.",
2768            "",
2769            "_____",
2770            "",
2771            "> Quote before rule",
2772            ">",
2773            "> ***",
2774            ">",
2775            "> Quote after rule",
2776            "",
2777            "Final paragraph.",
2778            "",
2779            "Here's a horizontal rule:",
2780            "",
2781            "---",
2782            "Text directly after the horizontal rule.",
2783            "",
2784        ];
2785
2786        let mut builder = GreenNodeBuilder::new();
2787        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2788
2789        assert!(result.is_none());
2790    }
2791
2792    #[test]
2793    fn test_not_multiline_table() {
2794        // Simple table should not be parsed as multiline
2795        let input = vec![
2796            "  Right     Left     Center     Default",
2797            "-------     ------ ----------   -------",
2798            "     12     12        12            12",
2799            "",
2800        ];
2801
2802        let mut builder = GreenNodeBuilder::new();
2803        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2804
2805        // Should not parse because first line isn't a full-width separator
2806        assert!(result.is_none());
2807    }
2808
2809    // Phase 7.1: Unit tests for emit_table_cell() helper
2810    #[test]
2811    fn test_emit_table_cell_plain_text() {
2812        let mut builder = GreenNodeBuilder::new();
2813        emit_table_cell(&mut builder, "Cell", &ParserOptions::default());
2814        let green = builder.finish();
2815        let node = SyntaxNode::new_root(green);
2816
2817        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2818        assert_eq!(node.text(), "Cell");
2819
2820        // Should have TEXT child
2821        let children: Vec<_> = node.children_with_tokens().collect();
2822        assert_eq!(children.len(), 1);
2823        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
2824    }
2825
2826    #[test]
2827    fn test_emit_table_cell_with_emphasis() {
2828        let mut builder = GreenNodeBuilder::new();
2829        emit_table_cell(&mut builder, "*italic*", &ParserOptions::default());
2830        let green = builder.finish();
2831        let node = SyntaxNode::new_root(green);
2832
2833        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2834        assert_eq!(node.text(), "*italic*");
2835
2836        // Should have EMPHASIS child
2837        let children: Vec<_> = node.children().collect();
2838        assert_eq!(children.len(), 1);
2839        assert_eq!(children[0].kind(), SyntaxKind::EMPHASIS);
2840    }
2841
2842    #[test]
2843    fn test_emit_table_cell_with_code() {
2844        let mut builder = GreenNodeBuilder::new();
2845        emit_table_cell(&mut builder, "`code`", &ParserOptions::default());
2846        let green = builder.finish();
2847        let node = SyntaxNode::new_root(green);
2848
2849        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2850        assert_eq!(node.text(), "`code`");
2851
2852        // Should have CODE_SPAN child
2853        let children: Vec<_> = node.children().collect();
2854        assert_eq!(children.len(), 1);
2855        assert_eq!(children[0].kind(), SyntaxKind::INLINE_CODE);
2856    }
2857
2858    #[test]
2859    fn test_emit_table_cell_with_link() {
2860        let mut builder = GreenNodeBuilder::new();
2861        emit_table_cell(&mut builder, "[text](url)", &ParserOptions::default());
2862        let green = builder.finish();
2863        let node = SyntaxNode::new_root(green);
2864
2865        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2866        assert_eq!(node.text(), "[text](url)");
2867
2868        // Should have LINK child
2869        let children: Vec<_> = node.children().collect();
2870        assert_eq!(children.len(), 1);
2871        assert_eq!(children[0].kind(), SyntaxKind::LINK);
2872    }
2873
2874    #[test]
2875    fn test_emit_table_cell_with_strong() {
2876        let mut builder = GreenNodeBuilder::new();
2877        emit_table_cell(&mut builder, "**bold**", &ParserOptions::default());
2878        let green = builder.finish();
2879        let node = SyntaxNode::new_root(green);
2880
2881        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2882        assert_eq!(node.text(), "**bold**");
2883
2884        // Should have STRONG child
2885        let children: Vec<_> = node.children().collect();
2886        assert_eq!(children.len(), 1);
2887        assert_eq!(children[0].kind(), SyntaxKind::STRONG);
2888    }
2889
2890    #[test]
2891    fn test_emit_table_cell_mixed_inline() {
2892        let mut builder = GreenNodeBuilder::new();
2893        emit_table_cell(
2894            &mut builder,
2895            "Text **bold** and `code`",
2896            &ParserOptions::default(),
2897        );
2898        let green = builder.finish();
2899        let node = SyntaxNode::new_root(green);
2900
2901        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2902        assert_eq!(node.text(), "Text **bold** and `code`");
2903
2904        // Should have multiple children: TEXT, STRONG, TEXT, CODE_SPAN
2905        let children: Vec<_> = node.children_with_tokens().collect();
2906        assert!(children.len() >= 4);
2907
2908        // Check some expected types
2909        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
2910        assert_eq!(children[1].kind(), SyntaxKind::STRONG);
2911    }
2912
2913    #[test]
2914    fn test_emit_table_cell_empty() {
2915        let mut builder = GreenNodeBuilder::new();
2916        emit_table_cell(&mut builder, "", &ParserOptions::default());
2917        let green = builder.finish();
2918        let node = SyntaxNode::new_root(green);
2919
2920        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2921        assert_eq!(node.text(), "");
2922
2923        // Empty cell should have no children
2924        let children: Vec<_> = node.children_with_tokens().collect();
2925        assert_eq!(children.len(), 0);
2926    }
2927
2928    #[test]
2929    fn test_emit_table_cell_escaped_pipe() {
2930        let mut builder = GreenNodeBuilder::new();
2931        emit_table_cell(&mut builder, r"A \| B", &ParserOptions::default());
2932        let green = builder.finish();
2933        let node = SyntaxNode::new_root(green);
2934
2935        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2936        // The escaped pipe should be preserved
2937        assert_eq!(node.text(), r"A \| B");
2938    }
2939}