Skip to main content

panache_parser/parser/blocks/
tables.rs

1//! Simple table parsing for Pandoc's simple_tables extension.
2
3use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6use unicode_width::UnicodeWidthChar;
7
8use crate::parser::utils::attributes::try_parse_trailing_attributes_with_pos;
9use crate::parser::utils::helpers::{emit_line_tokens, strip_newline};
10use crate::parser::utils::inline_emission;
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum Alignment {
14    Left,
15    Right,
16    Center,
17    Default,
18}
19
20/// Column information extracted from the separator line.
21#[derive(Debug, Clone)]
22pub(crate) struct Column {
23    /// Start position (byte index) in the line
24    start: usize,
25    /// End position (byte index) in the line
26    end: usize,
27    /// Column alignment
28    alignment: Alignment,
29}
30
31/// Try to detect if a line is a table separator line.
32/// Returns Some(column positions) if it's a valid separator.
33pub(crate) fn try_parse_table_separator(line: &str) -> Option<Vec<Column>> {
34    let trimmed = line.trim_start();
35    // Strip trailing newline if present (CRLF or LF)
36    let (trimmed, newline_str) = strip_newline(trimmed);
37    let leading_spaces = line.len() - trimmed.len() - newline_str.len();
38
39    // Must have leading spaces <= 3 to not be a code block
40    if leading_spaces > 3 {
41        return None;
42    }
43
44    // Simple tables only use dashed separators.
45    if trimmed.contains('*') || trimmed.contains('_') {
46        return None;
47    }
48
49    // Must contain at least one dash
50    if !trimmed.contains('-') {
51        return None;
52    }
53
54    // A separator line consists of dashes and spaces
55    if !trimmed.chars().all(|c| c == '-' || c == ' ') {
56        return None;
57    }
58
59    // Must not be a horizontal rule.
60    let dash_groups: Vec<_> = trimmed.split(' ').filter(|s| !s.is_empty()).collect();
61    if dash_groups.len() <= 1 {
62        return None;
63    }
64
65    // Extract column positions from dash groups
66    let columns = extract_columns(trimmed, leading_spaces);
67
68    if columns.is_empty() {
69        return None;
70    }
71
72    Some(columns)
73}
74
75/// Extract column positions from a separator line.
76fn extract_columns(separator: &str, offset: usize) -> Vec<Column> {
77    let mut columns = Vec::new();
78    let mut in_dashes = false;
79    let mut col_start = 0;
80
81    for (i, ch) in separator.char_indices() {
82        match ch {
83            '-' if !in_dashes => {
84                col_start = i + offset;
85                in_dashes = true;
86            }
87            ' ' if in_dashes => {
88                columns.push(Column {
89                    start: col_start,
90                    end: i + offset,
91                    alignment: Alignment::Default, // Will be determined later
92                });
93                in_dashes = false;
94            }
95            _ => {}
96        }
97    }
98
99    // Handle last column
100    if in_dashes {
101        columns.push(Column {
102            start: col_start,
103            end: separator.len() + offset,
104            alignment: Alignment::Default,
105        });
106    }
107
108    columns
109}
110
111/// Convert a character column offset into a UTF-8 byte index for `line`.
112///
113/// Simple-table column boundaries come from ASCII separator lines where
114/// character and byte offsets are identical. Data rows may contain multibyte
115/// characters, so we must remap offsets before slicing.
116fn column_offset_to_byte_index(line: &str, offset: usize) -> usize {
117    line.char_indices()
118        .nth(offset)
119        .map_or(line.len(), |(byte_idx, _)| byte_idx)
120}
121
122/// Try to parse a table caption from a line.
123/// Returns Some((prefix_len, caption_text)) if it's a caption.
124fn try_parse_caption_prefix(line: &str) -> Option<(usize, &str)> {
125    let trimmed = line.trim_start();
126    let leading_spaces = line.len() - trimmed.len();
127
128    // Must have leading spaces <= 3 to not be a code block
129    if leading_spaces > 3 {
130        return None;
131    }
132
133    // Check for "Table:" or "table:" or just ":".
134    if let Some(rest) = trimmed.strip_prefix("Table:") {
135        Some((leading_spaces + 6, rest))
136    } else if let Some(rest) = trimmed.strip_prefix("table:") {
137        Some((leading_spaces + 6, rest))
138    } else if let Some(rest) = trimmed.strip_prefix(':') {
139        // Just ":" caption markers must be followed by whitespace (Pandoc-style).
140        // This avoids accidentally treating constructs like fenced div fences ":::" as captions.
141        if rest.starts_with(|c: char| c.is_whitespace()) {
142            Some((leading_spaces + 1, rest))
143        } else {
144            None
145        }
146    } else {
147        None
148    }
149}
150
151/// Check if a line could be the start of a table caption.
152fn is_table_caption_start(line: &str) -> bool {
153    try_parse_caption_prefix(line).is_some()
154}
155
156fn is_bare_colon_caption_start(line: &str) -> bool {
157    let trimmed = line.trim_start();
158    trimmed.starts_with(':') && !trimmed.starts_with("::") && !trimmed.starts_with(":::")
159}
160
161fn bare_colon_caption_looks_like_definition_code_block(line: &str) -> bool {
162    let Some((_, rest)) = try_parse_caption_prefix(line) else {
163        return false;
164    };
165    let trimmed = rest.trim_start();
166    trimmed.starts_with("```") || trimmed.starts_with("~~~")
167}
168
169fn line_is_fenced_div_fence(line: &str) -> bool {
170    let trimmed = line.trim_start();
171    let colon_count = trimmed.chars().take_while(|&c| c == ':').count();
172    if colon_count < 3 {
173        return false;
174    }
175    let rest = &trimmed[colon_count..];
176    rest.is_empty() || rest.starts_with(char::is_whitespace)
177}
178
179fn is_valid_caption_start_before_table(lines: &[&str], pos: usize) -> bool {
180    if !is_table_caption_start(lines[pos]) {
181        return false;
182    }
183
184    if is_bare_colon_caption_start(lines[pos])
185        && bare_colon_caption_looks_like_definition_code_block(lines[pos])
186    {
187        return false;
188    }
189
190    // Avoid stealing definition-list definitions (":   ...") as table captions.
191    if is_bare_colon_caption_start(lines[pos])
192        && pos > 0
193        && !lines[pos - 1].trim().is_empty()
194        && !line_is_fenced_div_fence(lines[pos - 1])
195    {
196        return false;
197    }
198    true
199}
200
201/// Check if a line could be the start of a grid table.
202/// Grid tables start with a separator line like +---+---+ or +===+===+
203fn is_grid_table_start(line: &str) -> bool {
204    try_parse_grid_separator(line).is_some()
205}
206
207/// Check if a line could be the start of a multiline table.
208/// Multiline tables start with either:
209/// - A full-width dash separator (----)
210/// - A column separator with dashes and spaces (---- ---- ----)
211fn is_multiline_table_start(line: &str) -> bool {
212    try_parse_multiline_separator(line).is_some() || is_column_separator(line)
213}
214
215/// Check if there's a table following a potential caption at this position.
216/// This is used to avoid parsing a caption as a paragraph when it belongs to a table.
217pub(crate) fn is_caption_followed_by_table(lines: &[&str], caption_pos: usize) -> bool {
218    if caption_pos >= lines.len() {
219        return false;
220    }
221
222    // Caption must start with a caption prefix
223    if !is_valid_caption_start_before_table(lines, caption_pos) {
224        return false;
225    }
226
227    let mut pos = caption_pos + 1;
228
229    // Skip continuation lines of caption (non-blank lines)
230    while pos < lines.len() && !lines[pos].trim().is_empty() {
231        // If we hit a table separator, we found a table
232        if try_parse_table_separator(lines[pos]).is_some() {
233            return true;
234        }
235        pos += 1;
236    }
237
238    // Skip one blank line
239    if pos < lines.len() && lines[pos].trim().is_empty() {
240        pos += 1;
241    }
242
243    // Check for table at next position
244    if pos < lines.len() {
245        let line = lines[pos];
246
247        // Check for grid table start (+---+---+ or +===+===+)
248        if is_grid_table_start(line) {
249            return true;
250        }
251
252        // Check for multiline table start (---- or ---- ---- ----)
253        if is_multiline_table_start(line) {
254            return true;
255        }
256
257        // Could be a separator line (simple/pipe table, headerless)
258        if try_parse_table_separator(line).is_some() {
259            return true;
260        }
261
262        // Or could be a header line followed by separator (simple/pipe table with header)
263        if pos + 1 < lines.len() && !line.trim().is_empty() {
264            let next_line = lines[pos + 1];
265            if try_parse_table_separator(next_line).is_some()
266                || try_parse_pipe_separator(next_line).is_some()
267            {
268                return true;
269            }
270        }
271    }
272
273    false
274}
275
276fn caption_range_starting_at(lines: &[&str], start: usize) -> Option<(usize, usize)> {
277    if start >= lines.len() || !is_table_caption_start(lines[start]) {
278        return None;
279    }
280    let mut end = start + 1;
281    while end < lines.len() && !lines[end].trim().is_empty() {
282        end += 1;
283    }
284    Some((start, end))
285}
286
287/// Find caption before table (if any).
288/// Returns (caption_start, caption_end) positions, or None.
289fn find_caption_before_table(lines: &[&str], table_start: usize) -> Option<(usize, usize)> {
290    if table_start == 0 {
291        return None;
292    }
293
294    // Look backward for a caption
295    // Caption must be immediately before table (with possible blank line between)
296    let mut pos = table_start - 1;
297
298    // Skip one blank line if present
299    if lines[pos].trim().is_empty() {
300        if pos == 0 {
301            return None;
302        }
303        pos -= 1;
304    }
305
306    // Now pos points to the last non-blank line before the table
307    // This could be the last line of a multiline caption, or a single-line caption
308    let caption_end = pos + 1; // End is exclusive
309
310    // If this line is NOT a caption start, it might be a continuation line
311    // Scan backward through non-blank lines to find the caption start
312    if !is_valid_caption_start_before_table(lines, pos) {
313        // Not a caption start - check if there's a caption start above
314        let mut scan_pos = pos;
315        while scan_pos > 0 {
316            scan_pos -= 1;
317            let line = lines[scan_pos];
318
319            // If we hit a blank line, we've gone too far
320            if line.trim().is_empty() {
321                return None;
322            }
323
324            // If we find a caption start, this is the beginning of the multiline caption
325            if is_valid_caption_start_before_table(lines, scan_pos) {
326                if scan_pos > 0 && !lines[scan_pos - 1].trim().is_empty() {
327                    return None;
328                }
329                if previous_nonblank_looks_like_table(lines, scan_pos) {
330                    return None;
331                }
332                return Some((scan_pos, caption_end));
333            }
334        }
335        // Scanned to beginning without finding caption start
336        None
337    } else {
338        if pos > 0 && !lines[pos - 1].trim().is_empty() {
339            return None;
340        }
341        if previous_nonblank_looks_like_table(lines, pos) {
342            return None;
343        }
344        // This line is a caption start - return the range
345        Some((pos, caption_end))
346    }
347}
348
349fn previous_nonblank_looks_like_table(lines: &[&str], pos: usize) -> bool {
350    if pos == 0 {
351        return false;
352    }
353    let mut i = pos;
354    while i > 0 {
355        i -= 1;
356        let line = lines[i].trim();
357        if line.is_empty() {
358            continue;
359        }
360        return line_looks_like_table_syntax(line);
361    }
362    false
363}
364
365fn line_looks_like_table_syntax(line: &str) -> bool {
366    if line.starts_with('|') && line.matches('|').count() >= 2 {
367        return true;
368    }
369    if line.starts_with('+') && line.ends_with('+') && (line.contains('-') || line.contains('=')) {
370        return true;
371    }
372    try_parse_table_separator(line).is_some()
373        || try_parse_pipe_separator(line).is_some()
374        || try_parse_grid_separator(line).is_some()
375}
376
377/// Find caption after table (if any).
378/// Returns (caption_start, caption_end) positions, or None.
379fn find_caption_after_table(lines: &[&str], table_end: usize) -> Option<(usize, usize)> {
380    if table_end >= lines.len() {
381        return None;
382    }
383
384    let mut pos = table_end;
385
386    // Skip one blank line if present
387    if pos < lines.len() && lines[pos].trim().is_empty() {
388        pos += 1;
389    }
390
391    if pos >= lines.len() {
392        return None;
393    }
394
395    // Check if this line is a caption
396    if is_table_caption_start(lines[pos]) {
397        let caption_start = pos;
398        // Find end of caption (continues until blank line)
399        let mut caption_end = caption_start + 1;
400        while caption_end < lines.len() && !lines[caption_end].trim().is_empty() {
401            caption_end += 1;
402        }
403        Some((caption_start, caption_end))
404    } else {
405        None
406    }
407}
408
409/// Emit a table caption node.
410/// Emit caption text for a single line. If `lift_trailing_attrs` is set and
411/// the text ends with a balanced `{...}` block, lift it into a structural
412/// `ATTRIBUTE` node so `AttributeNode::cast` finds its id (matches Pandoc's
413/// `+caption_attributes` behavior — `: caption {#tbl-id}` gives the table
414/// the id).
415fn emit_caption_line_text(
416    builder: &mut GreenNodeBuilder<'static>,
417    text_with_newline: &str,
418    config: &ParserOptions,
419    lift_trailing_attrs: bool,
420) {
421    let (text, newline_str) = strip_newline(text_with_newline);
422
423    if lift_trailing_attrs
424        && !text.is_empty()
425        && let Some((_attrs, before_attrs, start_brace_pos)) =
426            try_parse_trailing_attributes_with_pos(text)
427    {
428        let trimmed_len = text.trim_end().len();
429        let space = &text[before_attrs.len()..start_brace_pos];
430        let raw_attrs = &text[start_brace_pos..trimmed_len];
431        let trailing_ws = &text[trimmed_len..];
432
433        if !before_attrs.is_empty() {
434            inline_emission::emit_inlines(builder, before_attrs, config);
435        }
436        if !space.is_empty() {
437            builder.token(SyntaxKind::WHITESPACE.into(), space);
438        }
439        builder.start_node(SyntaxKind::ATTRIBUTE.into());
440        builder.token(SyntaxKind::ATTRIBUTE.into(), raw_attrs);
441        builder.finish_node();
442        if !trailing_ws.is_empty() {
443            builder.token(SyntaxKind::WHITESPACE.into(), trailing_ws);
444        }
445        if !newline_str.is_empty() {
446            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
447        }
448        return;
449    }
450
451    if !text.is_empty() {
452        inline_emission::emit_inlines(builder, text, config);
453    }
454    if !newline_str.is_empty() {
455        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
456    }
457}
458
459fn emit_table_caption(
460    builder: &mut GreenNodeBuilder<'static>,
461    lines: &[&str],
462    start: usize,
463    end: usize,
464    config: &ParserOptions,
465) {
466    builder.start_node(SyntaxKind::TABLE_CAPTION.into());
467
468    let last_idx = (end - start).saturating_sub(1);
469
470    for (i, line) in lines[start..end].iter().enumerate() {
471        let lift_attrs = i == last_idx;
472        if i == 0 {
473            // First line - parse and emit prefix separately
474            let trimmed = line.trim_start();
475            let leading_ws_len = line.len() - trimmed.len();
476
477            // Emit leading whitespace if present
478            if leading_ws_len > 0 {
479                builder.token(SyntaxKind::WHITESPACE.into(), &line[..leading_ws_len]);
480            }
481
482            // Check for caption prefix and emit separately
483            // Calculate where the prefix ends (after trimmed content)
484            let prefix_and_rest = if line.ends_with('\n') {
485                &line[leading_ws_len..line.len() - 1] // Exclude newline
486            } else {
487                &line[leading_ws_len..]
488            };
489
490            let (prefix_len, prefix_text) = if prefix_and_rest.starts_with("Table: ") {
491                (7, "Table: ")
492            } else if prefix_and_rest.starts_with("table: ") {
493                (7, "table: ")
494            } else if prefix_and_rest.starts_with(": ") {
495                (2, ": ")
496            } else if prefix_and_rest.starts_with(':') {
497                (1, ":")
498            } else {
499                (0, "")
500            };
501
502            if prefix_len > 0 {
503                builder.token(SyntaxKind::TABLE_CAPTION_PREFIX.into(), prefix_text);
504
505                // Emit rest of line after prefix
506                let rest_start = leading_ws_len + prefix_len;
507                if rest_start < line.len() {
508                    emit_caption_line_text(builder, &line[rest_start..], config, lift_attrs);
509                }
510            } else {
511                // No recognized prefix, emit whole trimmed line
512                emit_caption_line_text(builder, &line[leading_ws_len..], config, lift_attrs);
513            }
514        } else {
515            // Continuation lines - emit with inline parsing (attrs only on last line).
516            emit_caption_line_text(builder, line, config, lift_attrs);
517        }
518    }
519
520    builder.finish_node(); // TABLE_CAPTION
521}
522
523/// Emit a table cell with inline content parsing.
524/// This is the core helper for Phase 7.1 table inline parsing migration.
525fn emit_table_cell(
526    builder: &mut GreenNodeBuilder<'static>,
527    cell_text: &str,
528    config: &ParserOptions,
529) {
530    builder.start_node(SyntaxKind::TABLE_CELL.into());
531
532    // Parse inline content within the cell
533    if !cell_text.is_empty() {
534        inline_emission::emit_inlines(builder, cell_text, config);
535    }
536
537    builder.finish_node(); // TABLE_CELL
538}
539
540/// Determine column alignments based on separator and optional header.
541fn determine_alignments(columns: &mut [Column], separator_line: &str, header_line: Option<&str>) {
542    for col in columns.iter_mut() {
543        let sep_slice = &separator_line[col.start..col.end];
544
545        if let Some(header) = header_line {
546            let header_start = column_offset_to_byte_index(header, col.start);
547            let header_end = column_offset_to_byte_index(header, col.end);
548
549            // Extract header text for this column
550            let header_text = if header_start < header_end {
551                header[header_start..header_end].trim()
552            } else if header_start < header.len() {
553                header[header_start..].trim()
554            } else {
555                ""
556            };
557
558            if header_text.is_empty() {
559                col.alignment = Alignment::Default;
560                continue;
561            }
562
563            // Find where the header text starts and ends within the column
564            let header_in_col = &header[header_start..header_end];
565            let text_start = header_in_col.len() - header_in_col.trim_start().len();
566            let text_end = header_in_col.trim_end().len() + text_start;
567
568            // Check dash alignment relative to text
569            let dashes_start = 0; // Dashes start at beginning of sep_slice
570            let dashes_end = sep_slice.len();
571
572            let flush_left = dashes_start == text_start;
573            let flush_right = dashes_end == text_end;
574
575            col.alignment = match (flush_left, flush_right) {
576                (true, true) => Alignment::Default,
577                (true, false) => Alignment::Left,
578                (false, true) => Alignment::Right,
579                (false, false) => Alignment::Center,
580            };
581        } else {
582            // Without header, alignment based on first row (we'll handle this later)
583            col.alignment = Alignment::Default;
584        }
585    }
586}
587
588/// Try to parse a simple table starting at the given position.
589/// Returns the number of lines consumed if successful.
590pub(crate) fn try_parse_simple_table(
591    lines: &[&str],
592    start_pos: usize,
593    builder: &mut GreenNodeBuilder<'static>,
594    config: &ParserOptions,
595) -> Option<usize> {
596    log::trace!("try_parse_simple_table at line {}", start_pos + 1);
597
598    if start_pos >= lines.len() {
599        return None;
600    }
601
602    // Look for a separator line
603    let separator_pos = find_separator_line(lines, start_pos)?;
604    log::trace!("  found separator at line {}", separator_pos + 1);
605
606    let separator_line = lines[separator_pos];
607    let mut columns = try_parse_table_separator(separator_line)?;
608
609    // Determine if there's a header (separator not at start)
610    let has_header = separator_pos > start_pos;
611    let header_line = if has_header {
612        Some(lines[separator_pos - 1])
613    } else {
614        None
615    };
616
617    // Determine alignments
618    determine_alignments(&mut columns, separator_line, header_line);
619
620    // Find table end (blank line or end of input)
621    let end_pos = find_table_end(lines, separator_pos + 1);
622
623    // Must have at least one data row (or it's just a separator)
624    let data_rows = end_pos - separator_pos - 1;
625
626    if data_rows == 0 {
627        return None;
628    }
629
630    // Check for caption before table
631    let caption_before = find_caption_before_table(lines, start_pos);
632
633    // Check for caption after table
634    let caption_after = if caption_before.is_some() {
635        None
636    } else {
637        find_caption_after_table(lines, end_pos)
638    };
639
640    // Build the table
641    builder.start_node(SyntaxKind::SIMPLE_TABLE.into());
642
643    // Emit caption before if present
644    if let Some((cap_start, cap_end)) = caption_before {
645        emit_table_caption(builder, lines, cap_start, cap_end, config);
646
647        // Emit blank line between caption and table if present
648        if cap_end < start_pos {
649            for line in lines.iter().take(start_pos).skip(cap_end) {
650                if line.trim().is_empty() {
651                    builder.start_node(SyntaxKind::BLANK_LINE.into());
652                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
653                    builder.finish_node();
654                }
655            }
656        }
657    }
658
659    // Emit header if present
660    if has_header {
661        emit_table_row(
662            builder,
663            lines[separator_pos - 1],
664            &columns,
665            SyntaxKind::TABLE_HEADER,
666            config,
667        );
668    }
669
670    // Emit separator
671    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
672    emit_line_tokens(builder, separator_line);
673    builder.finish_node();
674
675    // Emit data rows
676    for line in lines.iter().take(end_pos).skip(separator_pos + 1) {
677        emit_table_row(builder, line, &columns, SyntaxKind::TABLE_ROW, config);
678    }
679
680    // Emit caption after if present
681    if let Some((cap_start, cap_end)) = caption_after {
682        // Emit blank line before caption if needed
683        if cap_start > end_pos {
684            for line in lines.iter().take(cap_start).skip(end_pos) {
685                if line.trim().is_empty() {
686                    builder.start_node(SyntaxKind::BLANK_LINE.into());
687                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
688                    builder.finish_node();
689                }
690            }
691        }
692        emit_table_caption(builder, lines, cap_start, cap_end, config);
693    }
694
695    builder.finish_node(); // SimpleTable
696
697    // Calculate lines consumed (including captions)
698    let table_start = if let Some((cap_start, _)) = caption_before {
699        cap_start
700    } else if has_header {
701        separator_pos - 1
702    } else {
703        separator_pos
704    };
705
706    let table_end = if let Some((_, cap_end)) = caption_after {
707        cap_end
708    } else {
709        end_pos
710    };
711
712    let lines_consumed = table_end - table_start;
713
714    Some(lines_consumed)
715}
716
717/// Find the position of a separator line starting from pos.
718fn find_separator_line(lines: &[&str], start_pos: usize) -> Option<usize> {
719    log::trace!("  find_separator_line from line {}", start_pos + 1);
720
721    // Check first line
722    log::trace!("    checking first line: {:?}", lines[start_pos]);
723    if try_parse_table_separator(lines[start_pos]).is_some() {
724        log::trace!("    separator found at first line");
725        return Some(start_pos);
726    }
727
728    // Check second line (for table with header)
729    if start_pos + 1 < lines.len()
730        && !lines[start_pos].trim().is_empty()
731        && try_parse_table_separator(lines[start_pos + 1]).is_some()
732    {
733        return Some(start_pos + 1);
734    }
735    None
736}
737
738/// Find where the table ends (first blank line or end of input).
739fn find_table_end(lines: &[&str], start_pos: usize) -> usize {
740    for i in start_pos..lines.len() {
741        if lines[i].trim().is_empty() {
742            return i;
743        }
744        // Check if this could be a closing separator
745        if try_parse_table_separator(lines[i]).is_some() {
746            // Check if next line is blank or end
747            if i + 1 >= lines.len() || lines[i + 1].trim().is_empty() {
748                return i + 1;
749            }
750        }
751    }
752    lines.len()
753}
754
755/// Emit a table row (header or data row) with inline-parsed cells for simple tables.
756/// Uses column boundaries from the separator line to extract cells.
757fn emit_table_row(
758    builder: &mut GreenNodeBuilder<'static>,
759    line: &str,
760    columns: &[Column],
761    row_kind: SyntaxKind,
762    config: &ParserOptions,
763) {
764    builder.start_node(row_kind.into());
765
766    let (line_without_newline, newline_str) = strip_newline(line);
767
768    // Emit leading whitespace if present
769    let trimmed = line_without_newline.trim_start();
770    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
771    if leading_ws_len > 0 {
772        builder.token(
773            SyntaxKind::WHITESPACE.into(),
774            &line_without_newline[..leading_ws_len],
775        );
776    }
777
778    // Track where we are in the line (for losslessness)
779    let mut current_pos = 0;
780
781    // Extract and emit cells based on column boundaries
782    for col in columns.iter() {
783        // Calculate actual positions in the trimmed line (accounting for leading whitespace)
784        let cell_start = if col.start >= leading_ws_len {
785            column_offset_to_byte_index(trimmed, col.start - leading_ws_len)
786        } else {
787            0
788        };
789
790        let cell_end = if col.end >= leading_ws_len {
791            column_offset_to_byte_index(trimmed, col.end - leading_ws_len)
792        } else {
793            0
794        };
795
796        // Extract cell text from column bounds. When the column lies entirely
797        // before the trimmed content (col.end <= leading_ws_len) both bounds
798        // clamp to 0; treat that as an empty cell rather than re-emitting the
799        // whole row.
800        let cell_text = if cell_start < cell_end && cell_start < trimmed.len() {
801            &trimmed[cell_start..cell_end]
802        } else {
803            ""
804        };
805
806        let cell_content = cell_text.trim();
807        let cell_content_start = cell_text.len() - cell_text.trim_start().len();
808
809        // Emit any whitespace from current position to start of cell content
810        let content_abs_pos = (cell_start + cell_content_start).min(trimmed.len());
811        if current_pos < content_abs_pos {
812            builder.token(
813                SyntaxKind::WHITESPACE.into(),
814                &trimmed[current_pos..content_abs_pos],
815            );
816        }
817
818        // Emit cell with inline parsing
819        emit_table_cell(builder, cell_content, config);
820
821        // Update current position to end of cell content
822        current_pos = content_abs_pos + cell_content.len();
823    }
824
825    // Emit any remaining whitespace after last cell
826    if current_pos < trimmed.len() {
827        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
828    }
829
830    // Emit newline if present
831    if !newline_str.is_empty() {
832        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
833    }
834
835    builder.finish_node();
836}
837
838// ============================================================================
839// Pipe Table Parsing
840// ============================================================================
841
842/// Check if a line is a pipe table separator line.
843/// Returns the column alignments if it's a valid separator.
844fn try_parse_pipe_separator(line: &str) -> Option<Vec<Alignment>> {
845    let trimmed = line.trim();
846
847    // Must contain at least one pipe
848    if !trimmed.contains('|') && !trimmed.contains('+') {
849        return None;
850    }
851
852    // Split by pipes (or + for orgtbl variant)
853    let cells: Vec<&str> = if trimmed.contains('+') {
854        // Orgtbl variant: use + as separator in separator line
855        trimmed.split(['|', '+']).collect()
856    } else {
857        trimmed.split('|').collect()
858    };
859
860    let mut alignments = Vec::new();
861
862    for cell in cells {
863        let cell = cell.trim();
864
865        // Skip empty cells (from leading/trailing pipes)
866        if cell.is_empty() {
867            continue;
868        }
869
870        // Must be dashes with optional colons
871        let starts_colon = cell.starts_with(':');
872        let ends_colon = cell.ends_with(':');
873
874        // Remove colons to check if rest is all dashes
875        let without_colons = cell.trim_start_matches(':').trim_end_matches(':');
876
877        // Must have at least one dash
878        if without_colons.is_empty() || !without_colons.chars().all(|c| c == '-') {
879            return None;
880        }
881
882        // Determine alignment from colon positions
883        let alignment = match (starts_colon, ends_colon) {
884            (true, true) => Alignment::Center,
885            (true, false) => Alignment::Left,
886            (false, true) => Alignment::Right,
887            (false, false) => Alignment::Default,
888        };
889
890        alignments.push(alignment);
891    }
892
893    // Must have at least one column
894    if alignments.is_empty() {
895        None
896    } else {
897        Some(alignments)
898    }
899}
900
901/// Split a pipe table row into cells.
902/// Handles escaped pipes (\|) properly by not splitting on them.
903fn parse_pipe_table_row(line: &str) -> Vec<String> {
904    let trimmed = line.trim();
905
906    let mut cells = Vec::new();
907    let mut current_cell = String::new();
908    let mut chars = trimmed.chars().peekable();
909    let mut char_count = 0;
910
911    while let Some(ch) = chars.next() {
912        char_count += 1;
913        match ch {
914            '\\' => {
915                // Check if next char is a pipe - if so, it's an escaped pipe
916                if let Some(&'|') = chars.peek() {
917                    current_cell.push('\\');
918                    current_cell.push('|');
919                    chars.next(); // consume the pipe
920                } else {
921                    current_cell.push(ch);
922                }
923            }
924            '|' => {
925                // Check if this is the leading pipe (first character)
926                if char_count == 1 {
927                    continue; // Skip leading pipe
928                }
929
930                // End current cell, start new one
931                cells.push(current_cell.trim().to_string());
932                current_cell.clear();
933            }
934            _ => {
935                current_cell.push(ch);
936            }
937        }
938    }
939
940    // Add last cell if it's not empty (it would be empty if line ended with pipe)
941    let trimmed_cell = current_cell.trim().to_string();
942    if !trimmed_cell.is_empty() {
943        cells.push(trimmed_cell);
944    }
945
946    cells
947}
948
949/// Emit a pipe table row with inline-parsed cells.
950/// Preserves losslessness by emitting exact byte representation while parsing cell content inline.
951fn emit_pipe_table_row(
952    builder: &mut GreenNodeBuilder<'static>,
953    line: &str,
954    row_kind: SyntaxKind,
955    config: &ParserOptions,
956) {
957    builder.start_node(row_kind.into());
958
959    let (line_without_newline, newline_str) = strip_newline(line);
960    let trimmed = line_without_newline.trim();
961
962    // Parse cell boundaries
963    let mut cell_starts = Vec::new();
964    let mut cell_ends = Vec::new();
965    let mut in_escape = false;
966
967    // Find all pipe positions (excluding escaped ones)
968    let mut pipe_positions = Vec::new();
969    for (i, ch) in trimmed.char_indices() {
970        if in_escape {
971            in_escape = false;
972            continue;
973        }
974        if ch == '\\' {
975            in_escape = true;
976            continue;
977        }
978        if ch == '|' {
979            pipe_positions.push(i);
980        }
981    }
982
983    // Determine cell boundaries based on pipe positions
984    if pipe_positions.is_empty() {
985        // No pipes - treat entire line as one cell (shouldn't happen for valid pipe tables)
986        cell_starts.push(0);
987        cell_ends.push(trimmed.len());
988    } else {
989        // Check if line starts with pipe
990        let start_pipe = pipe_positions.first() == Some(&0);
991        // Check if line ends with pipe
992        let end_pipe = pipe_positions.last() == Some(&(trimmed.len() - 1));
993
994        if start_pipe {
995            // Skip first pipe
996            for i in 1..pipe_positions.len() {
997                cell_starts.push(pipe_positions[i - 1] + 1);
998                cell_ends.push(pipe_positions[i]);
999            }
1000            // Add last cell if there's no trailing pipe
1001            if !end_pipe {
1002                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1003                cell_ends.push(trimmed.len());
1004            }
1005        } else {
1006            // No leading pipe
1007            cell_starts.push(0);
1008            cell_ends.push(pipe_positions[0]);
1009
1010            for i in 1..pipe_positions.len() {
1011                cell_starts.push(pipe_positions[i - 1] + 1);
1012                cell_ends.push(pipe_positions[i]);
1013            }
1014
1015            // Add last cell if there's no trailing pipe
1016            if !end_pipe {
1017                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1018                cell_ends.push(trimmed.len());
1019            }
1020        }
1021    }
1022
1023    // Emit leading whitespace if present (before trim)
1024    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1025    if leading_ws_len > 0 {
1026        builder.token(
1027            SyntaxKind::WHITESPACE.into(),
1028            &line_without_newline[..leading_ws_len],
1029        );
1030    }
1031
1032    // Emit cells with pipes
1033    for (idx, (start, end)) in cell_starts.iter().zip(cell_ends.iter()).enumerate() {
1034        // Emit pipe before cell (except for first cell if no leading pipe)
1035        if *start > 0 {
1036            builder.token(SyntaxKind::TEXT.into(), "|");
1037        } else if idx == 0 && trimmed.starts_with('|') {
1038            // Leading pipe
1039            builder.token(SyntaxKind::TEXT.into(), "|");
1040        }
1041
1042        // Get cell content with its whitespace
1043        let cell_with_ws = &trimmed[*start..*end];
1044        let cell_content = cell_with_ws.trim();
1045
1046        // Emit leading whitespace within cell
1047        let cell_leading_ws = &cell_with_ws[..cell_with_ws.len() - cell_with_ws.trim_start().len()];
1048        if !cell_leading_ws.is_empty() {
1049            builder.token(SyntaxKind::WHITESPACE.into(), cell_leading_ws);
1050        }
1051
1052        // Emit cell with inline parsing
1053        emit_table_cell(builder, cell_content, config);
1054
1055        // Emit trailing whitespace within cell
1056        let cell_trailing_ws_start = cell_leading_ws.len() + cell_content.len();
1057        if cell_trailing_ws_start < cell_with_ws.len() {
1058            builder.token(
1059                SyntaxKind::WHITESPACE.into(),
1060                &cell_with_ws[cell_trailing_ws_start..],
1061            );
1062        }
1063    }
1064
1065    // Emit trailing pipe if present
1066    if !pipe_positions.is_empty() && trimmed.ends_with('|') {
1067        builder.token(SyntaxKind::TEXT.into(), "|");
1068    }
1069
1070    // Emit trailing whitespace after trim (before newline)
1071    let trailing_ws_start = leading_ws_len + trimmed.len();
1072    if trailing_ws_start < line_without_newline.len() {
1073        builder.token(
1074            SyntaxKind::WHITESPACE.into(),
1075            &line_without_newline[trailing_ws_start..],
1076        );
1077    }
1078
1079    // Emit newline
1080    if !newline_str.is_empty() {
1081        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1082    }
1083
1084    builder.finish_node();
1085}
1086
1087/// Try to parse a pipe table starting at the given position.
1088/// Returns the number of lines consumed if successful.
1089pub(crate) fn try_parse_pipe_table(
1090    lines: &[&str],
1091    start_pos: usize,
1092    builder: &mut GreenNodeBuilder<'static>,
1093    config: &ParserOptions,
1094) -> Option<usize> {
1095    if start_pos + 1 >= lines.len() {
1096        return None;
1097    }
1098
1099    // Check if this line is a caption followed by a table
1100    // If so, the actual table starts after the caption and blank line
1101    let (actual_start, caption_before) = if is_caption_followed_by_table(lines, start_pos) {
1102        let (cap_start, cap_end) = caption_range_starting_at(lines, start_pos)?;
1103        let mut pos = cap_end;
1104        while pos < lines.len() && lines[pos].trim().is_empty() {
1105            pos += 1;
1106        }
1107        (pos, Some((cap_start, cap_end)))
1108    } else {
1109        (start_pos, None)
1110    };
1111
1112    if actual_start + 1 >= lines.len() {
1113        return None;
1114    }
1115
1116    // First line should have pipes (potential header)
1117    let header_line = lines[actual_start];
1118    if !header_line.contains('|') {
1119        return None;
1120    }
1121
1122    // Second line should be separator
1123    let separator_line = lines[actual_start + 1];
1124    let alignments = try_parse_pipe_separator(separator_line)?;
1125
1126    // Parse header cells
1127    let header_cells = parse_pipe_table_row(header_line);
1128
1129    // Number of columns should match (approximately - be lenient)
1130    if header_cells.len() != alignments.len() && !header_cells.is_empty() {
1131        // Only fail if very different
1132        if header_cells.len() < alignments.len() / 2 || header_cells.len() > alignments.len() * 2 {
1133            return None;
1134        }
1135    }
1136
1137    // Find table end (first blank line or end of input)
1138    let mut end_pos = actual_start + 2;
1139    while end_pos < lines.len() {
1140        let line = lines[end_pos];
1141        if line.trim().is_empty() {
1142            break;
1143        }
1144        // Row should have pipes
1145        if !line.contains('|') {
1146            break;
1147        }
1148        end_pos += 1;
1149    }
1150
1151    // Must have at least one data row
1152    if end_pos <= actual_start + 2 {
1153        return None;
1154    }
1155
1156    // Check for caption before table (only if we didn't already detect it)
1157    let caption_before = caption_before.or_else(|| find_caption_before_table(lines, actual_start));
1158
1159    // Check for caption after table
1160    let caption_after = if caption_before.is_some() {
1161        None
1162    } else {
1163        find_caption_after_table(lines, end_pos)
1164    };
1165
1166    // Build the pipe table
1167    builder.start_node(SyntaxKind::PIPE_TABLE.into());
1168
1169    // Emit caption before if present
1170    if let Some((cap_start, cap_end)) = caption_before {
1171        emit_table_caption(builder, lines, cap_start, cap_end, config);
1172        // Emit blank line between caption and table if present
1173        if cap_end < actual_start {
1174            for line in lines.iter().take(actual_start).skip(cap_end) {
1175                if line.trim().is_empty() {
1176                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1177                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1178                    builder.finish_node();
1179                }
1180            }
1181        }
1182    }
1183
1184    // Emit header row with inline-parsed cells
1185    emit_pipe_table_row(builder, header_line, SyntaxKind::TABLE_HEADER, config);
1186
1187    // Emit separator
1188    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1189    emit_line_tokens(builder, separator_line);
1190    builder.finish_node();
1191
1192    // Emit data rows with inline-parsed cells
1193    for line in lines.iter().take(end_pos).skip(actual_start + 2) {
1194        emit_pipe_table_row(builder, line, SyntaxKind::TABLE_ROW, config);
1195    }
1196
1197    // Emit caption after if present
1198    if let Some((cap_start, cap_end)) = caption_after {
1199        // Emit blank line before caption if needed
1200        if cap_start > end_pos {
1201            for line in lines.iter().take(cap_start).skip(end_pos) {
1202                if line.trim().is_empty() {
1203                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1204                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1205                    builder.finish_node();
1206                }
1207            }
1208        }
1209        emit_table_caption(builder, lines, cap_start, cap_end, config);
1210    }
1211
1212    builder.finish_node(); // PipeTable
1213
1214    // Calculate lines consumed
1215    let table_start = caption_before
1216        .map(|(start, _)| start)
1217        .unwrap_or(actual_start);
1218    let table_end = if let Some((_, cap_end)) = caption_after {
1219        cap_end
1220    } else {
1221        end_pos
1222    };
1223
1224    Some(table_end - table_start)
1225}
1226
1227#[cfg(test)]
1228mod tests {
1229    use super::*;
1230
1231    #[test]
1232    fn test_separator_detection() {
1233        assert!(try_parse_table_separator("------- ------ ----------   -------").is_some());
1234        assert!(try_parse_table_separator("  ---  ---  ---").is_some());
1235        assert!(try_parse_table_separator("-------").is_none()); // horizontal rule
1236        assert!(try_parse_table_separator("--- --- ---").is_some()); // table separator
1237    }
1238
1239    #[test]
1240    fn test_column_extraction() {
1241        let line = "-------     ------ ----------   -------";
1242        let columns = extract_columns(line, 0);
1243        assert_eq!(columns.len(), 4);
1244    }
1245
1246    #[test]
1247    fn test_simple_table_with_header() {
1248        let input = vec![
1249            "  Right     Left     Center     Default",
1250            "-------     ------ ----------   -------",
1251            "     12     12        12            12",
1252            "    123     123       123          123",
1253            "",
1254        ];
1255
1256        let mut builder = GreenNodeBuilder::new();
1257        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1258
1259        assert!(result.is_some());
1260        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1261    }
1262
1263    #[test]
1264    fn test_headerless_table() {
1265        let input = vec![
1266            "-------     ------ ----------   -------",
1267            "     12     12        12            12",
1268            "    123     123       123          123",
1269            "",
1270        ];
1271
1272        let mut builder = GreenNodeBuilder::new();
1273        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1274
1275        assert!(result.is_some());
1276        assert_eq!(result.unwrap(), 3); // sep + 2 rows
1277    }
1278
1279    #[test]
1280    fn test_caption_prefix_detection() {
1281        assert!(try_parse_caption_prefix("Table: My caption").is_some());
1282        assert!(try_parse_caption_prefix("table: My caption").is_some());
1283        assert!(try_parse_caption_prefix(": My caption").is_some());
1284        assert!(try_parse_caption_prefix(":").is_none()); // Just colon, no content
1285        assert!(try_parse_caption_prefix("Not a caption").is_none());
1286    }
1287
1288    #[test]
1289    fn bare_colon_fenced_code_is_not_table_caption() {
1290        let input = "Term\n: ```\n  code\n  ```\n";
1291        let tree = crate::parse(input, None);
1292
1293        assert!(
1294            tree.descendants()
1295                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1296            "should parse as definition list"
1297        );
1298        assert!(
1299            tree.descendants()
1300                .any(|node| node.kind() == SyntaxKind::CODE_BLOCK),
1301            "definition should preserve fenced code block"
1302        );
1303        assert!(
1304            !tree
1305                .descendants()
1306                .any(|node| node.kind() == SyntaxKind::TABLE_CAPTION),
1307            "fenced code definition should not be parsed as table caption"
1308        );
1309    }
1310
1311    #[test]
1312    fn bare_colon_caption_after_div_opening_is_table_caption() {
1313        let input = "::: {#tbl:panel layout.nrow=\"1\"}\n  : My Caption {#tbl:foo-1}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\n  : My Caption2 {#tbl:foo-2}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\nCaption\n:::\n";
1314        let tree = crate::parse(input, None);
1315
1316        let caption_count = tree
1317            .descendants()
1318            .filter(|node| node.kind() == SyntaxKind::TABLE_CAPTION)
1319            .count();
1320        assert_eq!(
1321            caption_count, 2,
1322            "expected both captions to attach to tables"
1323        );
1324        assert!(
1325            !tree
1326                .descendants()
1327                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1328            "caption lines in this fenced div table layout should not parse as definition list"
1329        );
1330    }
1331
1332    #[test]
1333    fn test_table_with_caption_after() {
1334        let input = vec![
1335            "  Right     Left     Center     Default",
1336            "-------     ------ ----------   -------",
1337            "     12     12        12            12",
1338            "    123     123       123          123",
1339            "",
1340            "Table: Demonstration of simple table syntax.",
1341            "",
1342        ];
1343
1344        let mut builder = GreenNodeBuilder::new();
1345        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1346
1347        assert!(result.is_some());
1348        // Should consume: header + sep + 2 rows + blank + caption
1349        assert_eq!(result.unwrap(), 6);
1350    }
1351
1352    #[test]
1353    fn test_table_with_caption_before() {
1354        let input = vec![
1355            "Table: Demonstration of simple table syntax.",
1356            "",
1357            "  Right     Left     Center     Default",
1358            "-------     ------ ----------   -------",
1359            "     12     12        12            12",
1360            "    123     123       123          123",
1361            "",
1362        ];
1363
1364        let mut builder = GreenNodeBuilder::new();
1365        let result = try_parse_simple_table(&input, 2, &mut builder, &ParserOptions::default());
1366
1367        assert!(result.is_some());
1368        // Should consume: caption + blank + header + sep + 2 rows
1369        assert_eq!(result.unwrap(), 6);
1370    }
1371
1372    #[test]
1373    fn test_caption_with_colon_prefix() {
1374        let input = vec![
1375            "  Right     Left",
1376            "-------     ------",
1377            "     12     12",
1378            "",
1379            ": Short caption",
1380            "",
1381        ];
1382
1383        let mut builder = GreenNodeBuilder::new();
1384        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1385
1386        assert!(result.is_some());
1387        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1388    }
1389
1390    #[test]
1391    fn test_multiline_caption() {
1392        let input = vec![
1393            "  Right     Left",
1394            "-------     ------",
1395            "     12     12",
1396            "",
1397            "Table: This is a longer caption",
1398            "that spans multiple lines.",
1399            "",
1400        ];
1401
1402        let mut builder = GreenNodeBuilder::new();
1403        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1404
1405        assert!(result.is_some());
1406        // Should consume through end of multi-line caption
1407        assert_eq!(result.unwrap(), 6);
1408    }
1409
1410    #[test]
1411    fn test_simple_table_with_multibyte_cell_content() {
1412        let input = vec![
1413            "Name            Hex code     Hue     C, M, Y, K (%)   R, G, B (0-255)   R, G, B (%)",
1414            "--------------  ------------ ------- ---------------- ----------------- ------------",
1415            "        orange       #E69F00     41° 0, 50, 100, 0    230, 159, 0       90, 60, 0",
1416            "      sky blue       #56B4E9    202° 80, 0, 0, 0      86, 180, 233      35, 70, 90",
1417            "",
1418        ];
1419
1420        let mut builder = GreenNodeBuilder::new();
1421        let result = try_parse_simple_table(&input, 0, &mut builder, &ParserOptions::default());
1422
1423        assert!(result.is_some());
1424        assert_eq!(result.unwrap(), 4);
1425    }
1426
1427    // Pipe table tests
1428    #[test]
1429    fn test_pipe_separator_detection() {
1430        assert!(try_parse_pipe_separator("|------:|:-----|---------|:------:|").is_some());
1431        assert!(try_parse_pipe_separator("|---|---|").is_some());
1432        assert!(try_parse_pipe_separator("-----|-----:").is_some()); // No leading pipe
1433        assert!(try_parse_pipe_separator("|-----+-------|").is_some()); // Orgtbl variant
1434        assert!(try_parse_pipe_separator("not a separator").is_none());
1435    }
1436
1437    #[test]
1438    fn test_pipe_alignments() {
1439        let aligns = try_parse_pipe_separator("|------:|:-----|---------|:------:|").unwrap();
1440        assert_eq!(aligns.len(), 4);
1441        assert_eq!(aligns[0], Alignment::Right);
1442        assert_eq!(aligns[1], Alignment::Left);
1443        assert_eq!(aligns[2], Alignment::Default);
1444        assert_eq!(aligns[3], Alignment::Center);
1445    }
1446
1447    #[test]
1448    fn test_parse_pipe_table_row() {
1449        let cells = parse_pipe_table_row("| Right | Left | Center |");
1450        assert_eq!(cells.len(), 3);
1451        assert_eq!(cells[0], "Right");
1452        assert_eq!(cells[1], "Left");
1453        assert_eq!(cells[2], "Center");
1454
1455        // Without leading/trailing pipes
1456        let cells2 = parse_pipe_table_row("Right | Left | Center");
1457        assert_eq!(cells2.len(), 3);
1458    }
1459
1460    #[test]
1461    fn test_basic_pipe_table() {
1462        let input = vec![
1463            "",
1464            "| Right | Left | Center |",
1465            "|------:|:-----|:------:|",
1466            "|   12  |  12  |   12   |",
1467            "|  123  |  123 |  123   |",
1468            "",
1469        ];
1470
1471        let mut builder = GreenNodeBuilder::new();
1472        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1473
1474        assert!(result.is_some());
1475        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1476    }
1477
1478    #[test]
1479    fn test_pipe_table_no_edge_pipes() {
1480        let input = vec![
1481            "",
1482            "fruit| price",
1483            "-----|-----:",
1484            "apple|2.05",
1485            "pear|1.37",
1486            "",
1487        ];
1488
1489        let mut builder = GreenNodeBuilder::new();
1490        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1491
1492        assert!(result.is_some());
1493        assert_eq!(result.unwrap(), 4);
1494    }
1495
1496    #[test]
1497    fn test_pipe_table_with_caption() {
1498        let input = vec![
1499            "",
1500            "| Col1 | Col2 |",
1501            "|------|------|",
1502            "| A    | B    |",
1503            "",
1504            "Table: My pipe table",
1505            "",
1506        ];
1507
1508        let mut builder = GreenNodeBuilder::new();
1509        let result = try_parse_pipe_table(&input, 1, &mut builder, &ParserOptions::default());
1510
1511        assert!(result.is_some());
1512        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1513    }
1514
1515    #[test]
1516    fn test_pipe_table_with_multiline_caption_before() {
1517        let input = vec![
1518            ": (#tab:base) base R quoting",
1519            "functions",
1520            "",
1521            "| C | D |",
1522            "|---|---|",
1523            "| 3 | 4 |",
1524            "",
1525        ];
1526
1527        let mut builder = GreenNodeBuilder::new();
1528        let result = try_parse_pipe_table(&input, 0, &mut builder, &ParserOptions::default());
1529
1530        assert!(result.is_some());
1531        // caption(2) + blank(1) + header + sep + row
1532        assert_eq!(result.unwrap(), 6);
1533    }
1534}
1535
1536// ============================================================================
1537// Grid Table Parsing
1538// ============================================================================
1539
1540/// Check if a line is a grid table row separator (starts with +, contains -, ends with +).
1541/// Returns Some(vec of column info) if valid, None otherwise.
1542fn try_parse_grid_separator(line: &str) -> Option<Vec<GridColumn>> {
1543    let trimmed = line.trim_start();
1544    let leading_spaces = line.len() - trimmed.len();
1545
1546    // Must have leading spaces <= 3 to not be a code block
1547    if leading_spaces > 3 {
1548        return None;
1549    }
1550
1551    // Must start with + and end with +
1552    if !trimmed.starts_with('+') || !trimmed.trim_end().ends_with('+') {
1553        return None;
1554    }
1555
1556    // Split by + to get column segments
1557    let trimmed = trimmed.trim_end();
1558    let segments: Vec<&str> = trimmed.split('+').collect();
1559
1560    // Need at least 3 parts: empty before first +, column(s), empty after last +
1561    if segments.len() < 3 {
1562        return None;
1563    }
1564
1565    let mut columns = Vec::new();
1566
1567    // Parse each segment between + signs
1568    for segment in segments.iter().skip(1).take(segments.len() - 2) {
1569        if segment.is_empty() {
1570            continue;
1571        }
1572
1573        // Segment must be dashes/equals with optional colons for alignment
1574        let seg_trimmed = *segment;
1575
1576        // Get the fill character (after removing colons)
1577        let inner = seg_trimmed.trim_start_matches(':').trim_end_matches(':');
1578
1579        // Must be all dashes or all equals
1580        if inner.is_empty() {
1581            return None;
1582        }
1583
1584        let first_char = inner.chars().next().unwrap();
1585        if first_char != '-' && first_char != '=' {
1586            return None;
1587        }
1588
1589        if !inner.chars().all(|c| c == first_char) {
1590            return None;
1591        }
1592
1593        let is_header_sep = first_char == '=';
1594
1595        columns.push(GridColumn {
1596            is_header_separator: is_header_sep,
1597            width: seg_trimmed.chars().count(),
1598        });
1599    }
1600
1601    if columns.is_empty() {
1602        None
1603    } else {
1604        Some(columns)
1605    }
1606}
1607
1608/// Column information for grid tables.
1609#[derive(Debug, Clone)]
1610struct GridColumn {
1611    is_header_separator: bool,
1612    width: usize,
1613}
1614
1615fn slice_cell_by_display_width(line: &str, start_byte: usize, width: usize) -> (usize, usize) {
1616    let mut end_byte = start_byte;
1617    let mut display_cols = 0usize;
1618
1619    for (offset, ch) in line[start_byte..].char_indices() {
1620        if ch == '|' {
1621            let sep_byte = start_byte + offset;
1622            return (sep_byte, sep_byte + 1);
1623        }
1624        let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
1625        if display_cols + ch_width > width {
1626            break;
1627        }
1628        display_cols += ch_width;
1629        end_byte = start_byte + offset + ch.len_utf8();
1630        if display_cols >= width {
1631            break;
1632        }
1633    }
1634
1635    // If the width budget is exhausted before seeing a separator (for example
1636    // because of padding/layout drift), advance to the next literal separator
1637    // to keep row slicing aligned and preserve losslessness.
1638    let mut sep_byte = end_byte;
1639    while sep_byte < line.len() {
1640        let mut chars = line[sep_byte..].chars();
1641        let Some(ch) = chars.next() else {
1642            break;
1643        };
1644        if ch == '|' {
1645            return (sep_byte, sep_byte + 1);
1646        }
1647        sep_byte += ch.len_utf8();
1648    }
1649
1650    (end_byte, end_byte)
1651}
1652
1653/// Check if a line is a grid table content row.
1654/// Accepts normal rows ending with `|` and spanning-style continuation lines ending with `+`.
1655fn is_grid_content_row(line: &str) -> bool {
1656    let trimmed = line.trim_start();
1657    let leading_spaces = line.len() - trimmed.len();
1658
1659    if leading_spaces > 3 {
1660        return false;
1661    }
1662
1663    let trimmed = trimmed.trim_end();
1664    trimmed.starts_with('|') && (trimmed.ends_with('|') || trimmed.ends_with('+'))
1665}
1666
1667/// Extract cell contents from a single grid table row line.
1668/// Returns a vector of cell contents (trimmed) based on column boundaries.
1669/// Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1670fn extract_grid_cells_from_line(line: &str, _columns: &[GridColumn]) -> Vec<String> {
1671    let (line_content, _) = strip_newline(line);
1672    let line_trimmed = line_content.trim();
1673
1674    if !line_trimmed.starts_with('|') || !line_trimmed.ends_with('|') {
1675        return vec![String::new(); _columns.len()];
1676    }
1677
1678    let mut cells = Vec::with_capacity(_columns.len());
1679    let mut pos_byte = 1; // Skip leading pipe
1680
1681    for col in _columns {
1682        let col_idx = cells.len();
1683        if pos_byte >= line_trimmed.len() {
1684            cells.push(String::new());
1685            continue;
1686        }
1687
1688        let start_byte = pos_byte;
1689        let end_byte = if col_idx + 1 == _columns.len() {
1690            line_trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1691        } else {
1692            let (end, next_start) = slice_cell_by_display_width(line_trimmed, pos_byte, col.width);
1693            pos_byte = next_start;
1694            end
1695        };
1696        cells.push(line_trimmed[start_byte..end_byte].trim().to_string());
1697        if col_idx + 1 == _columns.len() {
1698            pos_byte = line_trimmed.len();
1699        }
1700    }
1701
1702    cells
1703}
1704
1705/// Extract cell contents from multiple grid table row lines (for multi-line cells).
1706/// Concatenates cell contents across lines with newlines, then trims.
1707fn extract_grid_cells_multiline(lines: &[&str], columns: &[GridColumn]) -> Vec<String> {
1708    if lines.is_empty() {
1709        return vec![String::new(); columns.len()];
1710    }
1711
1712    extract_grid_cells_from_line(lines[0], columns)
1713}
1714
1715/// Emit a grid table row with inline-parsed cells.
1716/// Handles multi-line rows by emitting first line with TABLE_CELL nodes,
1717/// then continuation lines as raw TEXT for losslessness.
1718fn emit_grid_table_row(
1719    builder: &mut GreenNodeBuilder<'static>,
1720    lines: &[&str],
1721    columns: &[GridColumn],
1722    row_kind: SyntaxKind,
1723    config: &ParserOptions,
1724) {
1725    if lines.is_empty() {
1726        return;
1727    }
1728
1729    // Extract cell contents from the first line.
1730    let cell_contents = extract_grid_cells_multiline(lines, columns);
1731
1732    builder.start_node(row_kind.into());
1733
1734    // Emit first line with TABLE_CELL nodes
1735    // Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1736    let first_line = lines[0];
1737    let (line_without_newline, newline_str) = strip_newline(first_line);
1738    let trimmed = line_without_newline.trim();
1739    let expected_pipe_count = columns.len().saturating_add(1);
1740    let actual_pipe_count = trimmed.chars().filter(|&c| c == '|').count();
1741
1742    // Rows that don't contain all expected column separators (spanning-style rows)
1743    // must be emitted verbatim for losslessness.
1744    if actual_pipe_count != expected_pipe_count {
1745        emit_line_tokens(builder, first_line);
1746        for line in lines.iter().skip(1) {
1747            emit_line_tokens(builder, line);
1748        }
1749        builder.finish_node();
1750        return;
1751    }
1752
1753    // Emit leading whitespace
1754    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1755    if leading_ws_len > 0 {
1756        builder.token(
1757            SyntaxKind::WHITESPACE.into(),
1758            &line_without_newline[..leading_ws_len],
1759        );
1760    }
1761
1762    // Emit leading pipe
1763    if trimmed.starts_with('|') {
1764        builder.token(SyntaxKind::TEXT.into(), "|");
1765    }
1766
1767    // Emit each cell based on fixed column widths from separators
1768    let mut pos_byte = 1usize; // after leading pipe
1769    for (idx, cell_content) in cell_contents.iter().enumerate() {
1770        let part = if idx < columns.len() && pos_byte <= trimmed.len() {
1771            let start_byte = pos_byte;
1772            let end_byte = if idx + 1 == columns.len() && !trimmed.is_empty() {
1773                trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1774            } else {
1775                let (end, next_start) =
1776                    slice_cell_by_display_width(trimmed, pos_byte, columns[idx].width);
1777                pos_byte = next_start;
1778                end
1779            };
1780            let slice = &trimmed[start_byte..end_byte];
1781            if idx + 1 == columns.len() {
1782                pos_byte = trimmed.len();
1783            }
1784            slice
1785        } else {
1786            ""
1787        };
1788
1789        // Emit leading whitespace in cell
1790        let cell_trimmed = part.trim();
1791        let ws_start_len = part.len() - part.trim_start().len();
1792        if ws_start_len > 0 {
1793            builder.token(SyntaxKind::WHITESPACE.into(), &part[..ws_start_len]);
1794        }
1795
1796        // Emit TABLE_CELL with inline parsing
1797        emit_table_cell(builder, cell_content, config);
1798
1799        // Emit trailing whitespace in cell
1800        let ws_end_start = ws_start_len + cell_trimmed.len();
1801        if ws_end_start < part.len() {
1802            builder.token(SyntaxKind::WHITESPACE.into(), &part[ws_end_start..]);
1803        }
1804
1805        // Emit pipe separator (unless this is the last cell and line doesn't end with |)
1806        if idx < cell_contents.len() - 1 || trimmed.ends_with('|') {
1807            builder.token(SyntaxKind::TEXT.into(), "|");
1808        }
1809    }
1810
1811    // Emit trailing whitespace before newline
1812    let trailing_ws_start = leading_ws_len + trimmed.len();
1813    if trailing_ws_start < line_without_newline.len() {
1814        builder.token(
1815            SyntaxKind::WHITESPACE.into(),
1816            &line_without_newline[trailing_ws_start..],
1817        );
1818    }
1819
1820    // Emit newline
1821    if !newline_str.is_empty() {
1822        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1823    }
1824
1825    // Emit continuation lines as TEXT for losslessness
1826    for line in lines.iter().skip(1) {
1827        emit_line_tokens(builder, line);
1828    }
1829
1830    builder.finish_node();
1831}
1832
1833/// Try to parse a grid table starting at the given position.
1834/// Returns the number of lines consumed if successful.
1835pub(crate) fn try_parse_grid_table(
1836    lines: &[&str],
1837    start_pos: usize,
1838    builder: &mut GreenNodeBuilder<'static>,
1839    config: &ParserOptions,
1840) -> Option<usize> {
1841    if start_pos >= lines.len() {
1842        return None;
1843    }
1844
1845    // Check if this line is a caption followed by a table
1846    // If so, the actual table starts after the caption and blank line
1847    let (actual_start, caption_before) = if is_caption_followed_by_table(lines, start_pos) {
1848        let (cap_start, cap_end) = caption_range_starting_at(lines, start_pos)?;
1849        let mut pos = cap_end;
1850        while pos < lines.len() && lines[pos].trim().is_empty() {
1851            pos += 1;
1852        }
1853        (pos, Some((cap_start, cap_end)))
1854    } else {
1855        (start_pos, None)
1856    };
1857
1858    if actual_start >= lines.len() {
1859        return None;
1860    }
1861
1862    // First line must be a grid separator
1863    let first_line = lines[actual_start];
1864    let _columns = try_parse_grid_separator(first_line)?;
1865
1866    // Track table structure
1867    let mut end_pos = actual_start + 1;
1868    let mut found_header_sep = false;
1869    let mut in_footer = false;
1870
1871    // Scan table lines
1872    while end_pos < lines.len() {
1873        let line = lines[end_pos];
1874
1875        // Check for blank line (table ends)
1876        if line.trim().is_empty() {
1877            break;
1878        }
1879
1880        // Check for separator line
1881        if let Some(sep_cols) = try_parse_grid_separator(line) {
1882            // Check if this is a header separator (=)
1883            if sep_cols.iter().any(|c| c.is_header_separator) {
1884                if !found_header_sep {
1885                    found_header_sep = true;
1886                } else if !in_footer {
1887                    // Second = separator starts footer
1888                    in_footer = true;
1889                }
1890            }
1891            end_pos += 1;
1892            continue;
1893        }
1894
1895        // Check for content row
1896        if is_grid_content_row(line) {
1897            end_pos += 1;
1898            continue;
1899        }
1900
1901        // Not a valid grid table line - table ends
1902        break;
1903    }
1904
1905    // Must have consumed at least 3 lines (top separator, content, bottom separator)
1906    // Or just top + content rows that end with a separator
1907    if end_pos <= actual_start + 1 {
1908        return None;
1909    }
1910
1911    // Last consumed line should be a separator for a well-formed table
1912    // But we'll be lenient and accept tables ending with content rows
1913
1914    // Check for caption before table (only if we didn't already detected it)
1915    let caption_before = caption_before.or_else(|| find_caption_before_table(lines, actual_start));
1916
1917    // Check for caption after table
1918    let caption_after = if caption_before.is_some() {
1919        None
1920    } else {
1921        find_caption_after_table(lines, end_pos)
1922    };
1923
1924    // Build the grid table
1925    builder.start_node(SyntaxKind::GRID_TABLE.into());
1926
1927    // Emit caption before if present
1928    if let Some((cap_start, cap_end)) = caption_before {
1929        emit_table_caption(builder, lines, cap_start, cap_end, config);
1930        // Emit blank line between caption and table if present
1931        if cap_end < actual_start {
1932            for line in lines.iter().take(actual_start).skip(cap_end) {
1933                if line.trim().is_empty() {
1934                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1935                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1936                    builder.finish_node();
1937                }
1938            }
1939        }
1940    }
1941
1942    // Track whether we've passed the header separator
1943    let mut past_header_sep = false;
1944    let mut in_footer_section = false;
1945    let mut current_row_lines: Vec<&str> = Vec::new();
1946    let mut current_row_kind = SyntaxKind::TABLE_HEADER;
1947
1948    // Emit table rows - accumulate multi-line cells
1949    for line in lines.iter().take(end_pos).skip(actual_start) {
1950        if let Some(sep_cols) = try_parse_grid_separator(line) {
1951            // Separator line - emit any accumulated row first
1952            if !current_row_lines.is_empty() {
1953                emit_grid_table_row(
1954                    builder,
1955                    &current_row_lines,
1956                    &sep_cols,
1957                    current_row_kind,
1958                    config,
1959                );
1960                current_row_lines.clear();
1961            }
1962
1963            let is_header_sep = sep_cols.iter().any(|c| c.is_header_separator);
1964
1965            if is_header_sep {
1966                if !past_header_sep {
1967                    // This is the header/body separator
1968                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1969                    emit_line_tokens(builder, line);
1970                    builder.finish_node();
1971                    past_header_sep = true;
1972                } else {
1973                    // Footer separator
1974                    if !in_footer_section {
1975                        in_footer_section = true;
1976                    }
1977                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1978                    emit_line_tokens(builder, line);
1979                    builder.finish_node();
1980                }
1981            } else {
1982                // Regular separator (row boundary)
1983                builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1984                emit_line_tokens(builder, line);
1985                builder.finish_node();
1986            }
1987        } else if is_grid_content_row(line) {
1988            // Content row - accumulate for multi-line cells
1989            current_row_kind = if !past_header_sep && found_header_sep {
1990                SyntaxKind::TABLE_HEADER
1991            } else if in_footer_section {
1992                SyntaxKind::TABLE_FOOTER
1993            } else {
1994                SyntaxKind::TABLE_ROW
1995            };
1996
1997            current_row_lines.push(line);
1998        }
1999    }
2000
2001    // Emit any remaining accumulated row
2002    if !current_row_lines.is_empty() {
2003        // Use first separator's columns for cell boundaries
2004        if let Some(sep_cols) = try_parse_grid_separator(lines[actual_start]) {
2005            emit_grid_table_row(
2006                builder,
2007                &current_row_lines,
2008                &sep_cols,
2009                current_row_kind,
2010                config,
2011            );
2012        }
2013    }
2014
2015    // Emit caption after if present
2016    if let Some((cap_start, cap_end)) = caption_after {
2017        if cap_start > end_pos {
2018            for line in lines.iter().take(cap_start).skip(end_pos) {
2019                if line.trim().is_empty() {
2020                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2021                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2022                    builder.finish_node();
2023                }
2024            }
2025        }
2026        emit_table_caption(builder, lines, cap_start, cap_end, config);
2027    }
2028
2029    builder.finish_node(); // GRID_TABLE
2030
2031    // Calculate lines consumed
2032    let table_start = caption_before
2033        .map(|(start, _)| start)
2034        .unwrap_or(actual_start);
2035    let table_end = if let Some((_, cap_end)) = caption_after {
2036        cap_end
2037    } else {
2038        end_pos
2039    };
2040
2041    Some(table_end - table_start)
2042}
2043
2044#[cfg(test)]
2045mod grid_table_tests {
2046    use super::*;
2047
2048    #[test]
2049    fn test_grid_separator_detection() {
2050        assert!(try_parse_grid_separator("+---+---+").is_some());
2051        assert!(try_parse_grid_separator("+===+===+").is_some());
2052        assert!(try_parse_grid_separator("+---------------+---------------+").is_some());
2053        assert!(try_parse_grid_separator("+:---:+").is_some()); // center aligned
2054        assert!(try_parse_grid_separator("not a separator").is_none());
2055        assert!(try_parse_grid_separator("|---|---|").is_none()); // pipe table sep
2056    }
2057
2058    #[test]
2059    fn test_grid_header_separator() {
2060        let cols = try_parse_grid_separator("+===+===+").unwrap();
2061        assert!(cols.iter().all(|c| c.is_header_separator));
2062
2063        let cols2 = try_parse_grid_separator("+---+---+").unwrap();
2064        assert!(cols2.iter().all(|c| !c.is_header_separator));
2065    }
2066
2067    #[test]
2068    fn test_grid_content_row_detection() {
2069        assert!(is_grid_content_row("| content | content |"));
2070        assert!(is_grid_content_row("|  |  |"));
2071        assert!(is_grid_content_row("| content +------+"));
2072        assert!(!is_grid_content_row("+---+---+")); // separator, not content
2073        assert!(!is_grid_content_row("no pipes here"));
2074    }
2075
2076    #[test]
2077    fn test_basic_grid_table() {
2078        let input = vec![
2079            "+-------+-------+",
2080            "| Col1  | Col2  |",
2081            "+=======+=======+",
2082            "| A     | B     |",
2083            "+-------+-------+",
2084            "",
2085        ];
2086
2087        let mut builder = GreenNodeBuilder::new();
2088        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2089
2090        assert!(result.is_some());
2091        assert_eq!(result.unwrap(), 5);
2092    }
2093
2094    #[test]
2095    fn test_grid_table_multirow() {
2096        let input = vec![
2097            "+---------------+---------------+",
2098            "| Fruit         | Advantages    |",
2099            "+===============+===============+",
2100            "| Bananas       | - wrapper     |",
2101            "|               | - color       |",
2102            "+---------------+---------------+",
2103            "| Oranges       | - scurvy      |",
2104            "|               | - tasty       |",
2105            "+---------------+---------------+",
2106            "",
2107        ];
2108
2109        let mut builder = GreenNodeBuilder::new();
2110        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2111
2112        assert!(result.is_some());
2113        assert_eq!(result.unwrap(), 9);
2114    }
2115
2116    #[test]
2117    fn test_grid_table_with_footer() {
2118        let input = vec![
2119            "+-------+-------+",
2120            "| Fruit | Price |",
2121            "+=======+=======+",
2122            "| Apple | $1.00 |",
2123            "+-------+-------+",
2124            "| Pear  | $1.50 |",
2125            "+=======+=======+",
2126            "| Total | $2.50 |",
2127            "+=======+=======+",
2128            "",
2129        ];
2130
2131        let mut builder = GreenNodeBuilder::new();
2132        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2133
2134        assert!(result.is_some());
2135        assert_eq!(result.unwrap(), 9);
2136    }
2137
2138    #[test]
2139    fn test_grid_table_headerless() {
2140        let input = vec![
2141            "+-------+-------+",
2142            "| A     | B     |",
2143            "+-------+-------+",
2144            "| C     | D     |",
2145            "+-------+-------+",
2146            "",
2147        ];
2148
2149        let mut builder = GreenNodeBuilder::new();
2150        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2151
2152        assert!(result.is_some());
2153        assert_eq!(result.unwrap(), 5);
2154    }
2155
2156    #[test]
2157    fn test_grid_table_with_caption_before() {
2158        let input = vec![
2159            ": Sample table",
2160            "",
2161            "+-------+-------+",
2162            "| A     | B     |",
2163            "+=======+=======+",
2164            "| C     | D     |",
2165            "+-------+-------+",
2166            "",
2167        ];
2168
2169        let mut builder = GreenNodeBuilder::new();
2170        let result = try_parse_grid_table(&input, 2, &mut builder, &ParserOptions::default());
2171
2172        assert!(result.is_some());
2173        // Should include caption + blank + table
2174        assert_eq!(result.unwrap(), 7);
2175    }
2176
2177    #[test]
2178    fn test_grid_table_with_caption_after() {
2179        let input = vec![
2180            "+-------+-------+",
2181            "| A     | B     |",
2182            "+=======+=======+",
2183            "| C     | D     |",
2184            "+-------+-------+",
2185            "",
2186            "Table: My grid table",
2187            "",
2188        ];
2189
2190        let mut builder = GreenNodeBuilder::new();
2191        let result = try_parse_grid_table(&input, 0, &mut builder, &ParserOptions::default());
2192
2193        assert!(result.is_some());
2194        // table + blank + caption
2195        assert_eq!(result.unwrap(), 7);
2196    }
2197}
2198
2199// ============================================================================
2200// Multiline Table Parsing
2201// ============================================================================
2202
2203/// Check if a line is a multiline table separator (continuous dashes).
2204/// Multiline table separators span the full width and are all dashes.
2205/// Returns Some(columns) if valid, None otherwise.
2206fn try_parse_multiline_separator(line: &str) -> Option<Vec<Column>> {
2207    let trimmed = line.trim_start();
2208    let leading_spaces = line.len() - trimmed.len();
2209
2210    // Must have leading spaces <= 3 to not be a code block
2211    if leading_spaces > 3 {
2212        return None;
2213    }
2214
2215    let trimmed = trimmed.trim_end();
2216
2217    // Must be all dashes (continuous line of dashes)
2218    if trimmed.is_empty() || !trimmed.chars().all(|c| c == '-') {
2219        return None;
2220    }
2221
2222    // Must have at least 3 dashes
2223    if trimmed.len() < 3 {
2224        return None;
2225    }
2226
2227    // This is a full-width separator - columns will be determined by column separator lines
2228    Some(vec![Column {
2229        start: leading_spaces,
2230        end: leading_spaces + trimmed.len(),
2231        alignment: Alignment::Default,
2232    }])
2233}
2234
2235/// Check if a line is a column separator line for multiline tables.
2236/// Column separators have dashes with spaces between them to define columns.
2237fn is_column_separator(line: &str) -> bool {
2238    try_parse_table_separator(line).is_some() && !line.contains('*') && !line.contains('_')
2239}
2240
2241fn is_headerless_single_row_without_blank(
2242    lines: &[&str],
2243    row_start: usize,
2244    row_end: usize,
2245    columns: &[Column],
2246) -> bool {
2247    if row_start >= row_end {
2248        return false;
2249    }
2250
2251    if row_end - row_start == 1 {
2252        return false;
2253    }
2254
2255    let Some(last_col) = columns.last() else {
2256        return false;
2257    };
2258
2259    for line in lines.iter().take(row_end).skip(row_start + 1) {
2260        let (content, _) = strip_newline(line);
2261        let prefix_end = last_col.start.min(content.len());
2262        if !content[..prefix_end].trim().is_empty() {
2263            return false;
2264        }
2265    }
2266
2267    true
2268}
2269
2270/// Try to parse a multiline table starting at the given position.
2271/// Returns the number of lines consumed if successful.
2272pub(crate) fn try_parse_multiline_table(
2273    lines: &[&str],
2274    start_pos: usize,
2275    builder: &mut GreenNodeBuilder<'static>,
2276    config: &ParserOptions,
2277) -> Option<usize> {
2278    if start_pos >= lines.len() {
2279        return None;
2280    }
2281
2282    let first_line = lines[start_pos];
2283
2284    // First line can be either:
2285    // 1. A full-width dash separator (for tables with headers)
2286    // 2. A column separator (for headerless tables)
2287    let is_full_width_start = try_parse_multiline_separator(first_line).is_some();
2288    let is_column_sep_start = !is_full_width_start && is_column_separator(first_line);
2289    let headerless_columns = if is_column_sep_start {
2290        try_parse_table_separator(first_line)
2291    } else {
2292        None
2293    };
2294
2295    if !is_full_width_start && !is_column_sep_start {
2296        return None;
2297    }
2298
2299    // Look ahead to find the structure
2300    let mut pos = start_pos + 1;
2301    let mut found_column_sep = is_column_sep_start; // Already found if headerless
2302    let mut column_sep_pos = if is_column_sep_start { start_pos } else { 0 };
2303    let mut has_header = false;
2304    let mut found_blank_line = false;
2305    let mut found_closing_sep = false;
2306    let mut content_line_count = 0usize;
2307
2308    // Scan for header section and column separator
2309    while pos < lines.len() {
2310        let line = lines[pos];
2311
2312        // Check for column separator (defines columns) - only if we started with full-width
2313        if is_full_width_start && is_column_separator(line) && !found_column_sep {
2314            found_column_sep = true;
2315            column_sep_pos = pos;
2316            has_header = pos > start_pos + 1; // Has header if there's content before column sep
2317            pos += 1;
2318            continue;
2319        }
2320
2321        // Check for blank line (row separator in body)
2322        if line.trim().is_empty() {
2323            found_blank_line = true;
2324            pos += 1;
2325            // Check if next line is a valid closing separator for this table shape.
2326            if pos < lines.len() {
2327                let next = lines[pos];
2328                let is_valid_closer = if is_full_width_start {
2329                    try_parse_multiline_separator(next).is_some()
2330                } else {
2331                    is_column_separator(next)
2332                };
2333                if is_valid_closer {
2334                    found_closing_sep = true;
2335                    pos += 1; // Include the closing separator
2336                    break;
2337                }
2338            }
2339            continue;
2340        }
2341
2342        // Check for closing full-width dashes (only for full-width-start tables).
2343        if is_full_width_start && try_parse_multiline_separator(line).is_some() {
2344            found_closing_sep = true;
2345            pos += 1;
2346            break;
2347        }
2348
2349        // Check for closing column separator (for headerless tables)
2350        if is_column_sep_start && is_column_separator(line) && content_line_count > 0 {
2351            found_closing_sep = true;
2352            pos += 1;
2353            break;
2354        }
2355
2356        // Content row
2357        content_line_count += 1;
2358        pos += 1;
2359    }
2360
2361    // Must have found a column separator to be a valid multiline table
2362    if !found_column_sep {
2363        return None;
2364    }
2365
2366    // Must have had at least one blank line between rows (distinguishes from simple tables)
2367    if !found_blank_line {
2368        if !is_column_sep_start {
2369            return None;
2370        }
2371        let columns = headerless_columns.as_deref()?;
2372        if !is_headerless_single_row_without_blank(lines, start_pos + 1, pos - 1, columns) {
2373            return None;
2374        }
2375    }
2376
2377    // Must have a closing separator
2378    if !found_closing_sep {
2379        return None;
2380    }
2381
2382    // Must have consumed more than just the opening separator
2383    if pos <= start_pos + 2 {
2384        return None;
2385    }
2386
2387    let end_pos = pos;
2388
2389    // Extract column boundaries from the separator line
2390    let columns =
2391        try_parse_table_separator(lines[column_sep_pos]).expect("Column separator must be valid");
2392
2393    // Check for caption before table
2394    let caption_before = find_caption_before_table(lines, start_pos);
2395
2396    // Check for caption after table
2397    let caption_after = if caption_before.is_some() {
2398        None
2399    } else {
2400        find_caption_after_table(lines, end_pos)
2401    };
2402
2403    // Build the multiline table
2404    builder.start_node(SyntaxKind::MULTILINE_TABLE.into());
2405
2406    // Emit caption before if present
2407    if let Some((cap_start, cap_end)) = caption_before {
2408        emit_table_caption(builder, lines, cap_start, cap_end, config);
2409
2410        // Emit blank line between caption and table if present
2411        if cap_end < start_pos {
2412            for line in lines.iter().take(start_pos).skip(cap_end) {
2413                if line.trim().is_empty() {
2414                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2415                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2416                    builder.finish_node();
2417                }
2418            }
2419        }
2420    }
2421
2422    // Emit opening separator
2423    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2424    emit_line_tokens(builder, lines[start_pos]);
2425    builder.finish_node();
2426
2427    // Track state for emitting
2428    let mut in_header = has_header;
2429    let mut current_row_lines: Vec<&str> = Vec::new();
2430
2431    for (i, line) in lines.iter().enumerate().take(end_pos).skip(start_pos + 1) {
2432        // Column separator (header/body divider)
2433        if i == column_sep_pos {
2434            // Emit any accumulated header lines
2435            if !current_row_lines.is_empty() {
2436                emit_multiline_table_row(
2437                    builder,
2438                    &current_row_lines,
2439                    &columns,
2440                    SyntaxKind::TABLE_HEADER,
2441                    config,
2442                );
2443                current_row_lines.clear();
2444            }
2445
2446            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2447            emit_line_tokens(builder, line);
2448            builder.finish_node();
2449            in_header = false;
2450            continue;
2451        }
2452
2453        // Closing separator (full-width or column separator at end)
2454        if try_parse_multiline_separator(line).is_some() || is_column_separator(line) {
2455            // Emit any accumulated row lines
2456            if !current_row_lines.is_empty() {
2457                let kind = if in_header {
2458                    SyntaxKind::TABLE_HEADER
2459                } else {
2460                    SyntaxKind::TABLE_ROW
2461                };
2462                emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2463                current_row_lines.clear();
2464            }
2465
2466            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2467            emit_line_tokens(builder, line);
2468            builder.finish_node();
2469            continue;
2470        }
2471
2472        // Blank line (row separator)
2473        if line.trim().is_empty() {
2474            // Emit accumulated row
2475            if !current_row_lines.is_empty() {
2476                let kind = if in_header {
2477                    SyntaxKind::TABLE_HEADER
2478                } else {
2479                    SyntaxKind::TABLE_ROW
2480                };
2481                emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2482                current_row_lines.clear();
2483            }
2484
2485            builder.start_node(SyntaxKind::BLANK_LINE.into());
2486            builder.token(SyntaxKind::BLANK_LINE.into(), line);
2487            builder.finish_node();
2488            continue;
2489        }
2490
2491        // Content line - accumulate for current row
2492        current_row_lines.push(line);
2493    }
2494
2495    // Emit any remaining accumulated lines
2496    if !current_row_lines.is_empty() {
2497        let kind = if in_header {
2498            SyntaxKind::TABLE_HEADER
2499        } else {
2500            SyntaxKind::TABLE_ROW
2501        };
2502        emit_multiline_table_row(builder, &current_row_lines, &columns, kind, config);
2503    }
2504
2505    // Emit caption after if present
2506    if let Some((cap_start, cap_end)) = caption_after {
2507        if cap_start > end_pos {
2508            for line in lines.iter().take(cap_start).skip(end_pos) {
2509                if line.trim().is_empty() {
2510                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2511                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2512                    builder.finish_node();
2513                }
2514            }
2515        }
2516        emit_table_caption(builder, lines, cap_start, cap_end, config);
2517    }
2518
2519    builder.finish_node(); // MultilineTable
2520
2521    // Calculate lines consumed
2522    let table_start = caption_before.map(|(start, _)| start).unwrap_or(start_pos);
2523    let table_end = if let Some((_, cap_end)) = caption_after {
2524        cap_end
2525    } else {
2526        end_pos
2527    };
2528
2529    Some(table_end - table_start)
2530}
2531
2532/// Extract cell contents from first line only (for CST emission).
2533/// Multi-line content will be in continuation TEXT tokens.
2534fn extract_first_line_cell_contents(line: &str, columns: &[Column]) -> Vec<String> {
2535    let (line_content, _) = strip_newline(line);
2536    let mut cells = Vec::new();
2537
2538    for column in columns.iter() {
2539        let column_start = column_offset_to_byte_index(line_content, column.start);
2540        let column_end = column_offset_to_byte_index(line_content, column.end);
2541
2542        // Extract FULL text for this column (including whitespace)
2543        let cell_text = if column_start < column_end {
2544            &line_content[column_start..column_end]
2545        } else if column_start < line_content.len() {
2546            &line_content[column_start..]
2547        } else {
2548            ""
2549        };
2550
2551        cells.push(cell_text.to_string());
2552    }
2553
2554    cells
2555}
2556
2557/// Emit a multiline table row with inline parsing (Phase 7.1).
2558fn emit_multiline_table_row(
2559    builder: &mut GreenNodeBuilder<'static>,
2560    lines: &[&str],
2561    columns: &[Column],
2562    kind: SyntaxKind,
2563    config: &ParserOptions,
2564) {
2565    if lines.is_empty() {
2566        return;
2567    }
2568
2569    // Extract cell contents from first line only (for CST losslessness)
2570    let first_line = lines[0];
2571    let cell_contents = extract_first_line_cell_contents(first_line, columns);
2572
2573    builder.start_node(kind.into());
2574
2575    // Emit first line with TABLE_CELL nodes
2576    let (trimmed, newline_str) = strip_newline(first_line);
2577    let mut current_pos = 0;
2578
2579    for (col_idx, column) in columns.iter().enumerate() {
2580        let cell_text = &cell_contents[col_idx];
2581        let cell_start = column_offset_to_byte_index(trimmed, column.start);
2582        let cell_end = column_offset_to_byte_index(trimmed, column.end);
2583
2584        // Emit whitespace before cell
2585        if current_pos < cell_start {
2586            builder.token(
2587                SyntaxKind::WHITESPACE.into(),
2588                &trimmed[current_pos..cell_start],
2589            );
2590        }
2591
2592        // Emit cell with inline parsing (first line content only)
2593        emit_table_cell(builder, cell_text, config);
2594
2595        current_pos = cell_end;
2596    }
2597
2598    // Emit trailing whitespace
2599    if current_pos < trimmed.len() {
2600        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
2601    }
2602
2603    // Emit newline
2604    if !newline_str.is_empty() {
2605        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2606    }
2607
2608    // Emit continuation lines as TEXT to preserve exact line structure
2609    for line in lines.iter().skip(1) {
2610        emit_line_tokens(builder, line);
2611    }
2612
2613    builder.finish_node();
2614}
2615
2616#[cfg(test)]
2617mod multiline_table_tests {
2618    use super::*;
2619    use crate::syntax::SyntaxNode;
2620
2621    #[test]
2622    fn test_multiline_separator_detection() {
2623        assert!(
2624            try_parse_multiline_separator(
2625                "-------------------------------------------------------------"
2626            )
2627            .is_some()
2628        );
2629        assert!(try_parse_multiline_separator("---").is_some());
2630        assert!(try_parse_multiline_separator("  -----").is_some()); // with leading spaces
2631        assert!(try_parse_multiline_separator("--").is_none()); // too short
2632        assert!(try_parse_multiline_separator("--- ---").is_none()); // has spaces
2633        assert!(try_parse_multiline_separator("+---+").is_none()); // grid separator
2634    }
2635
2636    #[test]
2637    fn test_basic_multiline_table() {
2638        let input = vec![
2639            "-------------------------------------------------------------",
2640            " Centered   Default           Right Left",
2641            "  Header    Aligned         Aligned Aligned",
2642            "----------- ------- --------------- -------------------------",
2643            "   First    row                12.0 Example of a row that",
2644            "                                    spans multiple lines.",
2645            "",
2646            "  Second    row                 5.0 Here's another one.",
2647            "-------------------------------------------------------------",
2648            "",
2649        ];
2650
2651        let mut builder = GreenNodeBuilder::new();
2652        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2653
2654        assert!(result.is_some());
2655        assert_eq!(result.unwrap(), 9);
2656    }
2657
2658    #[test]
2659    fn test_multiline_table_headerless() {
2660        let input = vec![
2661            "----------- ------- --------------- -------------------------",
2662            "   First    row                12.0 Example of a row that",
2663            "                                    spans multiple lines.",
2664            "",
2665            "  Second    row                 5.0 Here's another one.",
2666            "----------- ------- --------------- -------------------------",
2667            "",
2668        ];
2669
2670        let mut builder = GreenNodeBuilder::new();
2671        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2672
2673        assert!(result.is_some());
2674        assert_eq!(result.unwrap(), 6);
2675    }
2676
2677    #[test]
2678    fn test_multiline_table_headerless_single_line_is_not_multiline() {
2679        let input = vec![
2680            "-------     ------ ----------   -------",
2681            "     12     12        12             12",
2682            "-------     ------ ----------   -------",
2683            "",
2684            "Not part of table.",
2685            "",
2686        ];
2687
2688        let mut builder = GreenNodeBuilder::new();
2689        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2690
2691        assert!(result.is_none());
2692    }
2693
2694    #[test]
2695    fn test_multiline_table_headerless_single_row_continuation_without_blank_line() {
2696        let input = vec![
2697            "----------  ---------  -----------  ---------------------------",
2698            "   First    row               12.0  Example of a row that spans",
2699            "                                    multiple lines.",
2700            "----------  ---------  -----------  ---------------------------",
2701            "",
2702        ];
2703
2704        let mut builder = GreenNodeBuilder::new();
2705        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2706
2707        assert!(result.is_some());
2708        assert_eq!(result.unwrap(), 4);
2709    }
2710
2711    #[test]
2712    fn test_multiline_table_with_caption() {
2713        let input = vec![
2714            "-------------------------------------------------------------",
2715            " Col1       Col2",
2716            "----------- -------",
2717            "   A        B",
2718            "",
2719            "-------------------------------------------------------------",
2720            "",
2721            "Table: Here's the caption.",
2722            "",
2723        ];
2724
2725        let mut builder = GreenNodeBuilder::new();
2726        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2727
2728        assert!(result.is_some());
2729        // table (6 lines) + blank + caption
2730        assert_eq!(result.unwrap(), 8);
2731    }
2732
2733    #[test]
2734    fn test_multiline_table_single_row() {
2735        let input = vec![
2736            "---------------------------------------------",
2737            " Header1    Header2",
2738            "----------- -----------",
2739            "   Data     More data",
2740            "",
2741            "---------------------------------------------",
2742            "",
2743        ];
2744
2745        let mut builder = GreenNodeBuilder::new();
2746        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2747
2748        assert!(result.is_some());
2749        assert_eq!(result.unwrap(), 6);
2750    }
2751
2752    #[test]
2753    fn test_headerless_multiline_table_does_not_close_on_full_width_rule() {
2754        let input = vec![
2755            "- - - - -",
2756            "Third section with underscores.",
2757            "",
2758            "_____",
2759            "",
2760            "> Quote before rule",
2761            ">",
2762            "> ***",
2763            ">",
2764            "> Quote after rule",
2765            "",
2766            "Final paragraph.",
2767            "",
2768            "Here's a horizontal rule:",
2769            "",
2770            "---",
2771            "Text directly after the horizontal rule.",
2772            "",
2773        ];
2774
2775        let mut builder = GreenNodeBuilder::new();
2776        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2777
2778        assert!(result.is_none());
2779    }
2780
2781    #[test]
2782    fn test_not_multiline_table() {
2783        // Simple table should not be parsed as multiline
2784        let input = vec![
2785            "  Right     Left     Center     Default",
2786            "-------     ------ ----------   -------",
2787            "     12     12        12            12",
2788            "",
2789        ];
2790
2791        let mut builder = GreenNodeBuilder::new();
2792        let result = try_parse_multiline_table(&input, 0, &mut builder, &ParserOptions::default());
2793
2794        // Should not parse because first line isn't a full-width separator
2795        assert!(result.is_none());
2796    }
2797
2798    // Phase 7.1: Unit tests for emit_table_cell() helper
2799    #[test]
2800    fn test_emit_table_cell_plain_text() {
2801        let mut builder = GreenNodeBuilder::new();
2802        emit_table_cell(&mut builder, "Cell", &ParserOptions::default());
2803        let green = builder.finish();
2804        let node = SyntaxNode::new_root(green);
2805
2806        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2807        assert_eq!(node.text(), "Cell");
2808
2809        // Should have TEXT child
2810        let children: Vec<_> = node.children_with_tokens().collect();
2811        assert_eq!(children.len(), 1);
2812        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
2813    }
2814
2815    #[test]
2816    fn test_emit_table_cell_with_emphasis() {
2817        let mut builder = GreenNodeBuilder::new();
2818        emit_table_cell(&mut builder, "*italic*", &ParserOptions::default());
2819        let green = builder.finish();
2820        let node = SyntaxNode::new_root(green);
2821
2822        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2823        assert_eq!(node.text(), "*italic*");
2824
2825        // Should have EMPHASIS child
2826        let children: Vec<_> = node.children().collect();
2827        assert_eq!(children.len(), 1);
2828        assert_eq!(children[0].kind(), SyntaxKind::EMPHASIS);
2829    }
2830
2831    #[test]
2832    fn test_emit_table_cell_with_code() {
2833        let mut builder = GreenNodeBuilder::new();
2834        emit_table_cell(&mut builder, "`code`", &ParserOptions::default());
2835        let green = builder.finish();
2836        let node = SyntaxNode::new_root(green);
2837
2838        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2839        assert_eq!(node.text(), "`code`");
2840
2841        // Should have CODE_SPAN child
2842        let children: Vec<_> = node.children().collect();
2843        assert_eq!(children.len(), 1);
2844        assert_eq!(children[0].kind(), SyntaxKind::INLINE_CODE);
2845    }
2846
2847    #[test]
2848    fn test_emit_table_cell_with_link() {
2849        let mut builder = GreenNodeBuilder::new();
2850        emit_table_cell(&mut builder, "[text](url)", &ParserOptions::default());
2851        let green = builder.finish();
2852        let node = SyntaxNode::new_root(green);
2853
2854        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2855        assert_eq!(node.text(), "[text](url)");
2856
2857        // Should have LINK child
2858        let children: Vec<_> = node.children().collect();
2859        assert_eq!(children.len(), 1);
2860        assert_eq!(children[0].kind(), SyntaxKind::LINK);
2861    }
2862
2863    #[test]
2864    fn test_emit_table_cell_with_strong() {
2865        let mut builder = GreenNodeBuilder::new();
2866        emit_table_cell(&mut builder, "**bold**", &ParserOptions::default());
2867        let green = builder.finish();
2868        let node = SyntaxNode::new_root(green);
2869
2870        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2871        assert_eq!(node.text(), "**bold**");
2872
2873        // Should have STRONG child
2874        let children: Vec<_> = node.children().collect();
2875        assert_eq!(children.len(), 1);
2876        assert_eq!(children[0].kind(), SyntaxKind::STRONG);
2877    }
2878
2879    #[test]
2880    fn test_emit_table_cell_mixed_inline() {
2881        let mut builder = GreenNodeBuilder::new();
2882        emit_table_cell(
2883            &mut builder,
2884            "Text **bold** and `code`",
2885            &ParserOptions::default(),
2886        );
2887        let green = builder.finish();
2888        let node = SyntaxNode::new_root(green);
2889
2890        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2891        assert_eq!(node.text(), "Text **bold** and `code`");
2892
2893        // Should have multiple children: TEXT, STRONG, TEXT, CODE_SPAN
2894        let children: Vec<_> = node.children_with_tokens().collect();
2895        assert!(children.len() >= 4);
2896
2897        // Check some expected types
2898        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
2899        assert_eq!(children[1].kind(), SyntaxKind::STRONG);
2900    }
2901
2902    #[test]
2903    fn test_emit_table_cell_empty() {
2904        let mut builder = GreenNodeBuilder::new();
2905        emit_table_cell(&mut builder, "", &ParserOptions::default());
2906        let green = builder.finish();
2907        let node = SyntaxNode::new_root(green);
2908
2909        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2910        assert_eq!(node.text(), "");
2911
2912        // Empty cell should have no children
2913        let children: Vec<_> = node.children_with_tokens().collect();
2914        assert_eq!(children.len(), 0);
2915    }
2916
2917    #[test]
2918    fn test_emit_table_cell_escaped_pipe() {
2919        let mut builder = GreenNodeBuilder::new();
2920        emit_table_cell(&mut builder, r"A \| B", &ParserOptions::default());
2921        let green = builder.finish();
2922        let node = SyntaxNode::new_root(green);
2923
2924        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
2925        // The escaped pipe should be preserved
2926        assert_eq!(node.text(), r"A \| B");
2927    }
2928}