Skip to main content

panache_parser/parser/blocks/
tables.rs

1//! Simple table parsing for Pandoc's simple_tables extension.
2
3use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6use unicode_width::UnicodeWidthChar;
7
8use crate::parser::utils::attributes::{
9    emit_attribute_node, try_parse_trailing_attributes_with_pos,
10};
11use crate::parser::utils::helpers::{emit_line_tokens, strip_newline};
12use crate::parser::utils::inline_emission;
13
14use super::container_prefix::StrippedLines;
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum Alignment {
18    Left,
19    Right,
20    Center,
21    Default,
22}
23
24/// Column information extracted from the separator line.
25#[derive(Debug, Clone)]
26pub(crate) struct Column {
27    /// Start position (byte index) in the line
28    start: usize,
29    /// End position (byte index) in the line
30    end: usize,
31    /// Column alignment
32    alignment: Alignment,
33}
34
35/// Try to detect if a line is a table separator line.
36/// Returns Some(column positions) if it's a valid separator.
37pub(crate) fn try_parse_table_separator(line: &str) -> Option<Vec<Column>> {
38    let trimmed = line.trim_start();
39    // Strip trailing newline if present (CRLF or LF)
40    let (trimmed, newline_str) = strip_newline(trimmed);
41    let leading_spaces = line.len() - trimmed.len() - newline_str.len();
42
43    // Must have leading spaces <= 3 to not be a code block
44    if leading_spaces > 3 {
45        return None;
46    }
47
48    // Simple tables only use dashed separators.
49    if trimmed.contains('*') || trimmed.contains('_') {
50        return None;
51    }
52
53    // Must contain at least one dash
54    if !trimmed.contains('-') {
55        return None;
56    }
57
58    // A separator line consists of dashes and spaces
59    if !trimmed.chars().all(|c| c == '-' || c == ' ') {
60        return None;
61    }
62
63    // Must not be a horizontal rule.
64    let dash_groups: Vec<_> = trimmed.split(' ').filter(|s| !s.is_empty()).collect();
65    if dash_groups.len() <= 1 {
66        return None;
67    }
68
69    // Extract column positions from dash groups
70    let columns = extract_columns(trimmed, leading_spaces);
71
72    if columns.is_empty() {
73        return None;
74    }
75
76    Some(columns)
77}
78
79/// Extract column positions from a separator line.
80fn extract_columns(separator: &str, offset: usize) -> Vec<Column> {
81    let mut columns = Vec::new();
82    let mut in_dashes = false;
83    let mut col_start = 0;
84
85    for (i, ch) in separator.char_indices() {
86        match ch {
87            '-' if !in_dashes => {
88                col_start = i + offset;
89                in_dashes = true;
90            }
91            ' ' if in_dashes => {
92                columns.push(Column {
93                    start: col_start,
94                    end: i + offset,
95                    alignment: Alignment::Default, // Will be determined later
96                });
97                in_dashes = false;
98            }
99            _ => {}
100        }
101    }
102
103    // Handle last column
104    if in_dashes {
105        columns.push(Column {
106            start: col_start,
107            end: separator.len() + offset,
108            alignment: Alignment::Default,
109        });
110    }
111
112    columns
113}
114
115/// Convert a character column offset into a UTF-8 byte index for `line`.
116///
117/// Simple-table column boundaries come from ASCII separator lines where
118/// character and byte offsets are identical. Data rows may contain multibyte
119/// characters, so we must remap offsets before slicing.
120fn column_offset_to_byte_index(line: &str, offset: usize) -> usize {
121    line.char_indices()
122        .nth(offset)
123        .map_or(line.len(), |(byte_idx, _)| byte_idx)
124}
125
126/// Try to parse a table caption from a line.
127/// Returns Some((prefix_len, caption_text)) if it's a caption.
128fn try_parse_caption_prefix(line: &str) -> Option<(usize, &str)> {
129    let trimmed = line.trim_start();
130    let leading_spaces = line.len() - trimmed.len();
131
132    // Must have leading spaces <= 3 to not be a code block
133    if leading_spaces > 3 {
134        return None;
135    }
136
137    // Check for "Table:" or "table:" or just ":".
138    if let Some(rest) = trimmed.strip_prefix("Table:") {
139        Some((leading_spaces + 6, rest))
140    } else if let Some(rest) = trimmed.strip_prefix("table:") {
141        Some((leading_spaces + 6, rest))
142    } else if let Some(rest) = trimmed.strip_prefix(':') {
143        // Just ":" caption markers must be followed by whitespace (Pandoc-style).
144        // This avoids accidentally treating constructs like fenced div fences ":::" as captions.
145        if rest.starts_with(|c: char| c.is_whitespace()) {
146            Some((leading_spaces + 1, rest))
147        } else {
148            None
149        }
150    } else {
151        None
152    }
153}
154
155/// Check if a line could be the start of a table caption.
156fn is_table_caption_start(line: &str) -> bool {
157    try_parse_caption_prefix(line).is_some()
158}
159
160fn is_bare_colon_caption_start(line: &str) -> bool {
161    let trimmed = line.trim_start();
162    trimmed.starts_with(':') && !trimmed.starts_with("::") && !trimmed.starts_with(":::")
163}
164
165fn bare_colon_caption_looks_like_definition_code_block(line: &str) -> bool {
166    let Some((_, rest)) = try_parse_caption_prefix(line) else {
167        return false;
168    };
169    let trimmed = rest.trim_start();
170    trimmed.starts_with("```") || trimmed.starts_with("~~~")
171}
172
173fn line_is_fenced_div_fence(line: &str) -> bool {
174    let trimmed = line.trim_start();
175    let colon_count = trimmed.chars().take_while(|&c| c == ':').count();
176    if colon_count < 3 {
177        return false;
178    }
179    let rest = &trimmed[colon_count..];
180    rest.is_empty() || rest.starts_with(char::is_whitespace)
181}
182
183fn is_valid_caption_start_before_table(lines: &[&str], pos: usize) -> bool {
184    if !is_table_caption_start(lines[pos]) {
185        return false;
186    }
187
188    if is_bare_colon_caption_start(lines[pos])
189        && bare_colon_caption_looks_like_definition_code_block(lines[pos])
190    {
191        return false;
192    }
193
194    // Avoid stealing definition-list definitions (":   ...") as table captions.
195    if is_bare_colon_caption_start(lines[pos])
196        && pos > 0
197        && !lines[pos - 1].trim().is_empty()
198        && !line_is_fenced_div_fence(lines[pos - 1])
199    {
200        return false;
201    }
202    true
203}
204
205/// Check if a line could be the start of a grid table.
206/// Grid tables start with a separator line like +---+---+ or +===+===+
207fn is_grid_table_start(line: &str) -> bool {
208    try_parse_grid_separator(line).is_some()
209}
210
211/// Check if a line could be the start of a multiline table.
212/// Multiline tables start with either:
213/// - A full-width dash separator (----)
214/// - A column separator with dashes and spaces (---- ---- ----)
215fn is_multiline_table_start(line: &str) -> bool {
216    try_parse_multiline_separator(line).is_some() || is_column_separator(line)
217}
218
219/// Check if there's a table following a potential caption at this position.
220/// This is used to avoid parsing a caption as a paragraph when it belongs to a table.
221pub(crate) fn is_caption_followed_by_table(lines: &[&str], caption_pos: usize) -> bool {
222    if caption_pos >= lines.len() {
223        return false;
224    }
225
226    // Caption must start with a caption prefix
227    if !is_valid_caption_start_before_table(lines, caption_pos) {
228        return false;
229    }
230
231    let mut pos = caption_pos + 1;
232
233    // Skip continuation lines of caption (non-blank lines).
234    // Stop at fenced-div fences (`:::`) — those close the enclosing div and
235    // must not be folded into the caption.
236    while pos < lines.len()
237        && !lines[pos].trim().is_empty()
238        && !line_is_fenced_div_fence(lines[pos])
239    {
240        // If we hit a table separator, we found a table
241        if try_parse_table_separator(lines[pos]).is_some() {
242            return true;
243        }
244        pos += 1;
245    }
246
247    // Skip one blank line
248    if pos < lines.len() && lines[pos].trim().is_empty() {
249        pos += 1;
250    }
251
252    // Check for table at next position
253    if pos < lines.len() {
254        let line = lines[pos];
255
256        // Check for grid table start (+---+---+ or +===+===+)
257        if is_grid_table_start(line) {
258            return true;
259        }
260
261        // Check for multiline table start (---- or ---- ---- ----)
262        if is_multiline_table_start(line) {
263            return true;
264        }
265
266        // Could be a separator line (simple/pipe table, headerless)
267        if try_parse_table_separator(line).is_some() {
268            return true;
269        }
270
271        // Or could be a header line followed by separator (simple/pipe table with header)
272        if pos + 1 < lines.len() && !line.trim().is_empty() {
273            let next_line = lines[pos + 1];
274            if try_parse_table_separator(next_line).is_some()
275                || try_parse_pipe_separator(next_line).is_some()
276            {
277                return true;
278            }
279        }
280    }
281
282    false
283}
284
285fn caption_range_starting_at(lines: &[&str], start: usize) -> Option<(usize, usize)> {
286    if start >= lines.len() || !is_table_caption_start(lines[start]) {
287        return None;
288    }
289    let mut end = start + 1;
290    while end < lines.len()
291        && !lines[end].trim().is_empty()
292        && !line_is_fenced_div_fence(lines[end])
293    {
294        end += 1;
295    }
296    Some((start, end))
297}
298
299/// Find caption before table (if any).
300/// Returns (caption_start, caption_end) positions, or None.
301fn find_caption_before_table(lines: &[&str], table_start: usize) -> Option<(usize, usize)> {
302    if table_start == 0 {
303        return None;
304    }
305
306    // Look backward for a caption
307    // Caption must be immediately before table (with possible blank line between)
308    let mut pos = table_start - 1;
309
310    // Skip one blank line if present
311    if lines[pos].trim().is_empty() {
312        if pos == 0 {
313            return None;
314        }
315        pos -= 1;
316    }
317
318    // Now pos points to the last non-blank line before the table
319    // This could be the last line of a multiline caption, or a single-line caption
320    let caption_end = pos + 1; // End is exclusive
321
322    // If this line is NOT a caption start, it might be a continuation line
323    // Scan backward through non-blank lines to find the caption start
324    if !is_valid_caption_start_before_table(lines, pos) {
325        // Not a caption start - check if there's a caption start above
326        let mut scan_pos = pos;
327        while scan_pos > 0 {
328            scan_pos -= 1;
329            let line = lines[scan_pos];
330
331            // If we hit a blank line or fenced-div fence, we've gone too far
332            if line.trim().is_empty() || line_is_fenced_div_fence(line) {
333                return None;
334            }
335
336            // If we find a caption start, this is the beginning of the multiline caption
337            if is_valid_caption_start_before_table(lines, scan_pos) {
338                if scan_pos > 0 && !lines[scan_pos - 1].trim().is_empty() {
339                    return None;
340                }
341                if previous_nonblank_looks_like_table(lines, scan_pos) {
342                    return None;
343                }
344                return Some((scan_pos, caption_end));
345            }
346        }
347        // Scanned to beginning without finding caption start
348        None
349    } else {
350        if pos > 0 && !lines[pos - 1].trim().is_empty() {
351            return None;
352        }
353        if previous_nonblank_looks_like_table(lines, pos) {
354            return None;
355        }
356        // This line is a caption start - return the range
357        Some((pos, caption_end))
358    }
359}
360
361fn previous_nonblank_looks_like_table(lines: &[&str], pos: usize) -> bool {
362    if pos == 0 {
363        return false;
364    }
365    let mut i = pos;
366    while i > 0 {
367        i -= 1;
368        let line = lines[i].trim();
369        if line.is_empty() {
370            continue;
371        }
372        return line_looks_like_table_syntax(line);
373    }
374    false
375}
376
377fn line_looks_like_table_syntax(line: &str) -> bool {
378    if line.starts_with('|') && line.matches('|').count() >= 2 {
379        return true;
380    }
381    if line.starts_with('+') && line.ends_with('+') && (line.contains('-') || line.contains('=')) {
382        return true;
383    }
384    try_parse_table_separator(line).is_some()
385        || try_parse_pipe_separator(line).is_some()
386        || try_parse_grid_separator(line).is_some()
387}
388
389/// Find caption after table (if any).
390/// Returns (caption_start, caption_end) positions, or None.
391fn find_caption_after_table(lines: &[&str], table_end: usize) -> Option<(usize, usize)> {
392    if table_end >= lines.len() {
393        return None;
394    }
395
396    let mut pos = table_end;
397
398    // Skip one blank line if present
399    if pos < lines.len() && lines[pos].trim().is_empty() {
400        pos += 1;
401    }
402
403    if pos >= lines.len() {
404        return None;
405    }
406
407    // Check if this line is a caption
408    if is_table_caption_start(lines[pos]) {
409        let caption_start = pos;
410        // Find end of caption (continues until blank line or fenced-div fence)
411        let mut caption_end = caption_start + 1;
412        while caption_end < lines.len()
413            && !lines[caption_end].trim().is_empty()
414            && !line_is_fenced_div_fence(lines[caption_end])
415        {
416            caption_end += 1;
417        }
418        Some((caption_start, caption_end))
419    } else {
420        None
421    }
422}
423
424/// Emit a table caption node.
425/// Emit caption text for a single line. If `lift_trailing_attrs` is set and
426/// the text ends with a balanced `{...}` block, lift it into a structural
427/// `ATTRIBUTE` node so `AttributeNode::cast` finds its id (matches Pandoc's
428/// `+caption_attributes` behavior — `: caption {#tbl-id}` gives the table
429/// the id).
430fn emit_caption_line_text(
431    builder: &mut GreenNodeBuilder<'static>,
432    text_with_newline: &str,
433    config: &ParserOptions,
434    lift_trailing_attrs: bool,
435) {
436    let (text, newline_str) = strip_newline(text_with_newline);
437
438    if lift_trailing_attrs
439        && !text.is_empty()
440        && let Some((_attrs, before_attrs, start_brace_pos)) =
441            try_parse_trailing_attributes_with_pos(text)
442    {
443        let trimmed_len = text.trim_end().len();
444        let space = &text[before_attrs.len()..start_brace_pos];
445        let raw_attrs = &text[start_brace_pos..trimmed_len];
446        let trailing_ws = &text[trimmed_len..];
447
448        if !before_attrs.is_empty() {
449            inline_emission::emit_inlines(builder, before_attrs, config, false);
450        }
451        if !space.is_empty() {
452            builder.token(SyntaxKind::WHITESPACE.into(), space);
453        }
454        emit_attribute_node(builder, raw_attrs);
455        if !trailing_ws.is_empty() {
456            builder.token(SyntaxKind::WHITESPACE.into(), trailing_ws);
457        }
458        if !newline_str.is_empty() {
459            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
460        }
461        return;
462    }
463
464    if !text.is_empty() {
465        inline_emission::emit_inlines(builder, text, config, false);
466    }
467    if !newline_str.is_empty() {
468        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
469    }
470}
471
472fn emit_table_caption(
473    builder: &mut GreenNodeBuilder<'static>,
474    lines: &[&str],
475    start: usize,
476    end: usize,
477    config: &ParserOptions,
478) {
479    builder.start_node(SyntaxKind::TABLE_CAPTION.into());
480
481    let last_idx = (end - start).saturating_sub(1);
482
483    for (i, line) in lines[start..end].iter().enumerate() {
484        let lift_attrs = i == last_idx;
485        if i == 0 {
486            // First line - parse and emit prefix separately
487            let trimmed = line.trim_start();
488            let leading_ws_len = line.len() - trimmed.len();
489
490            // Emit leading whitespace if present
491            if leading_ws_len > 0 {
492                builder.token(SyntaxKind::WHITESPACE.into(), &line[..leading_ws_len]);
493            }
494
495            // Check for caption prefix and emit separately
496            // Calculate where the prefix ends (after trimmed content)
497            let prefix_and_rest = if line.ends_with('\n') {
498                &line[leading_ws_len..line.len() - 1] // Exclude newline
499            } else {
500                &line[leading_ws_len..]
501            };
502
503            let (prefix_len, prefix_text) = if prefix_and_rest.starts_with("Table: ") {
504                (7, "Table: ")
505            } else if prefix_and_rest.starts_with("table: ") {
506                (7, "table: ")
507            } else if prefix_and_rest.starts_with(": ") {
508                (2, ": ")
509            } else if prefix_and_rest.starts_with(':') {
510                (1, ":")
511            } else {
512                (0, "")
513            };
514
515            if prefix_len > 0 {
516                builder.token(SyntaxKind::TABLE_CAPTION_PREFIX.into(), prefix_text);
517
518                // Emit rest of line after prefix
519                let rest_start = leading_ws_len + prefix_len;
520                if rest_start < line.len() {
521                    emit_caption_line_text(builder, &line[rest_start..], config, lift_attrs);
522                }
523            } else {
524                // No recognized prefix, emit whole trimmed line
525                emit_caption_line_text(builder, &line[leading_ws_len..], config, lift_attrs);
526            }
527        } else {
528            // Continuation lines - emit with inline parsing (attrs only on last line).
529            emit_caption_line_text(builder, line, config, lift_attrs);
530        }
531    }
532
533    builder.finish_node(); // TABLE_CAPTION
534}
535
536/// Emit a table cell with inline content parsing.
537/// This is the core helper for Phase 7.1 table inline parsing migration.
538fn emit_table_cell(
539    builder: &mut GreenNodeBuilder<'static>,
540    cell_text: &str,
541    config: &ParserOptions,
542) {
543    builder.start_node(SyntaxKind::TABLE_CELL.into());
544
545    // Parse inline content within the cell
546    if !cell_text.is_empty() {
547        inline_emission::emit_inlines(builder, cell_text, config, false);
548    }
549
550    builder.finish_node(); // TABLE_CELL
551}
552
553/// Determine column alignments based on separator and optional header.
554fn determine_alignments(columns: &mut [Column], separator_line: &str, header_line: Option<&str>) {
555    for col in columns.iter_mut() {
556        let sep_slice = &separator_line[col.start..col.end];
557
558        if let Some(header) = header_line {
559            let header_start = column_offset_to_byte_index(header, col.start);
560            let header_end = column_offset_to_byte_index(header, col.end);
561
562            // Extract header text for this column
563            let header_text = if header_start < header_end {
564                header[header_start..header_end].trim()
565            } else if header_start < header.len() {
566                header[header_start..].trim()
567            } else {
568                ""
569            };
570
571            if header_text.is_empty() {
572                col.alignment = Alignment::Default;
573                continue;
574            }
575
576            // Find where the header text starts and ends within the column
577            let header_in_col = &header[header_start..header_end];
578            let text_start = header_in_col.len() - header_in_col.trim_start().len();
579            let text_end = header_in_col.trim_end().len() + text_start;
580
581            // Check dash alignment relative to text
582            let dashes_start = 0; // Dashes start at beginning of sep_slice
583            let dashes_end = sep_slice.len();
584
585            let flush_left = dashes_start == text_start;
586            let flush_right = dashes_end == text_end;
587
588            col.alignment = match (flush_left, flush_right) {
589                (true, true) => Alignment::Default,
590                (true, false) => Alignment::Left,
591                (false, true) => Alignment::Right,
592                (false, false) => Alignment::Center,
593            };
594        } else {
595            // Without header, alignment based on first row (we'll handle this later)
596            col.alignment = Alignment::Default;
597        }
598    }
599}
600
601/// Try to parse a simple table starting at the given position.
602/// Returns the number of lines consumed if successful.
603pub(crate) fn try_parse_simple_table(
604    window: &StrippedLines<'_, '_>,
605    builder: &mut GreenNodeBuilder<'static>,
606    config: &ParserOptions,
607) -> Option<usize> {
608    let lines = window.raw();
609    let start_pos = window.pos();
610    log::trace!("try_parse_simple_table at line {}", start_pos + 1);
611
612    if start_pos >= lines.len() {
613        return None;
614    }
615
616    // Detection scans run against the container-prefix-stripped view so a
617    // table nested in `list → blockquote` (e.g. `- >  a   b`) has its `  > `
618    // prefix removed before the separator/column-shape checks. With an empty
619    // prefix `stripped == lines`. Emission re-emits the prefix bytes as
620    // tokens via the window; captions/blank lines still read raw `lines`.
621    let stripped = window.strip_all();
622
623    // Look for a separator line
624    let separator_pos = find_separator_line(&stripped, start_pos)?;
625    log::trace!("  found separator at line {}", separator_pos + 1);
626
627    let separator_line = stripped[separator_pos];
628    let mut columns = try_parse_table_separator(separator_line)?;
629
630    // Determine if there's a header (separator not at start)
631    let has_header = separator_pos > start_pos;
632    let header_line = if has_header {
633        Some(stripped[separator_pos - 1])
634    } else {
635        None
636    };
637
638    // Determine alignments
639    determine_alignments(&mut columns, separator_line, header_line);
640
641    // Find table end (blank line or end of input)
642    let end_pos = find_table_end(&stripped, separator_pos + 1);
643
644    // Must have at least one data row (or it's just a separator)
645    let data_rows = end_pos - separator_pos - 1;
646
647    if data_rows == 0 {
648        return None;
649    }
650
651    // Check for caption before table
652    let caption_before = find_caption_before_table(&stripped, start_pos);
653
654    // Check for caption after table
655    let caption_after = if caption_before.is_some() {
656        None
657    } else {
658        find_caption_after_table(&stripped, end_pos)
659    };
660
661    // Build the table
662    builder.start_node(SyntaxKind::SIMPLE_TABLE.into());
663
664    // Emit caption before if present
665    if let Some((cap_start, cap_end)) = caption_before {
666        emit_table_caption(builder, lines, cap_start, cap_end, config);
667
668        // Emit blank line between caption and table if present
669        if cap_end < start_pos {
670            for line in lines.iter().take(start_pos).skip(cap_end) {
671                if line.trim().is_empty() {
672                    builder.start_node(SyntaxKind::BLANK_LINE.into());
673                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
674                    builder.finish_node();
675                }
676            }
677        }
678    }
679
680    // Emit header if present. On the dispatch line the core already emitted
681    // the container prefix; only continuation rows re-emit it (via the window
682    // inside `emit_table_row`).
683    if has_header {
684        emit_table_row(
685            builder,
686            window,
687            separator_pos - 1,
688            &columns,
689            SyntaxKind::TABLE_HEADER,
690            config,
691        );
692    }
693
694    // Emit separator, re-emitting any continuation-line container prefix
695    // (`  > `) as WHITESPACE/BLOCK_QUOTE_MARKER tokens before the row text.
696    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
697    let separator_tail = window.emit_or_dispatch_tail(builder, separator_pos);
698    emit_line_tokens(builder, separator_tail);
699    builder.finish_node();
700
701    // Emit data rows (always continuation lines)
702    for idx in (separator_pos + 1)..end_pos {
703        emit_table_row(
704            builder,
705            window,
706            idx,
707            &columns,
708            SyntaxKind::TABLE_ROW,
709            config,
710        );
711    }
712
713    // Emit caption after if present
714    if let Some((cap_start, cap_end)) = caption_after {
715        // Emit blank line before caption if needed
716        if cap_start > end_pos {
717            for line in lines.iter().take(cap_start).skip(end_pos) {
718                if line.trim().is_empty() {
719                    builder.start_node(SyntaxKind::BLANK_LINE.into());
720                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
721                    builder.finish_node();
722                }
723            }
724        }
725        emit_table_caption(builder, lines, cap_start, cap_end, config);
726    }
727
728    builder.finish_node(); // SimpleTable
729
730    // Calculate lines consumed (including captions)
731    let table_start = if let Some((cap_start, _)) = caption_before {
732        cap_start
733    } else if has_header {
734        separator_pos - 1
735    } else {
736        separator_pos
737    };
738
739    let table_end = if let Some((_, cap_end)) = caption_after {
740        cap_end
741    } else {
742        end_pos
743    };
744
745    let lines_consumed = table_end - table_start;
746
747    Some(lines_consumed)
748}
749
750/// Find the position of a separator line starting from pos.
751fn find_separator_line(lines: &[&str], start_pos: usize) -> Option<usize> {
752    log::trace!("  find_separator_line from line {}", start_pos + 1);
753
754    // Check first line
755    log::trace!("    checking first line: {:?}", lines[start_pos]);
756    if try_parse_table_separator(lines[start_pos]).is_some() {
757        log::trace!("    separator found at first line");
758        return Some(start_pos);
759    }
760
761    // Check second line (for table with header)
762    if start_pos + 1 < lines.len()
763        && !lines[start_pos].trim().is_empty()
764        && try_parse_table_separator(lines[start_pos + 1]).is_some()
765    {
766        return Some(start_pos + 1);
767    }
768    None
769}
770
771/// Find where the table ends (first blank line or end of input).
772fn find_table_end(lines: &[&str], start_pos: usize) -> usize {
773    for i in start_pos..lines.len() {
774        if lines[i].trim().is_empty() {
775            return i;
776        }
777        // Check if this could be a closing separator
778        if try_parse_table_separator(lines[i]).is_some() {
779            // Check if next line is blank or end
780            if i + 1 >= lines.len() || lines[i + 1].trim().is_empty() {
781                return i + 1;
782            }
783        }
784    }
785    lines.len()
786}
787
788/// Emit a table row (header or data row) with inline-parsed cells for simple tables.
789/// Uses column boundaries from the separator line to extract cells.
790fn emit_table_row(
791    builder: &mut GreenNodeBuilder<'static>,
792    window: &StrippedLines<'_, '_>,
793    abs_idx: usize,
794    columns: &[Column],
795    row_kind: SyntaxKind,
796    config: &ParserOptions,
797) {
798    builder.start_node(row_kind.into());
799
800    // On continuation lines the leading `  > ` prefix is re-emitted as
801    // WHITESPACE/BLOCK_QUOTE_MARKER tokens inside the row node and the
802    // stripped tail returned; the dispatch line just strips its (already
803    // core-emitted) prefix. Empty prefix ⇒ the raw line.
804    let line = window.emit_or_dispatch_tail(builder, abs_idx);
805
806    let (line_without_newline, newline_str) = strip_newline(line);
807
808    // Emit leading whitespace if present
809    let trimmed = line_without_newline.trim_start();
810    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
811    if leading_ws_len > 0 {
812        builder.token(
813            SyntaxKind::WHITESPACE.into(),
814            &line_without_newline[..leading_ws_len],
815        );
816    }
817
818    // Track where we are in the line (for losslessness)
819    let mut current_pos = 0;
820
821    // Extract and emit cells based on column boundaries
822    for col in columns.iter() {
823        // Calculate actual positions in the trimmed line (accounting for leading whitespace)
824        let cell_start = if col.start >= leading_ws_len {
825            column_offset_to_byte_index(trimmed, col.start - leading_ws_len)
826        } else {
827            0
828        };
829
830        let cell_end = if col.end >= leading_ws_len {
831            column_offset_to_byte_index(trimmed, col.end - leading_ws_len)
832        } else {
833            0
834        };
835
836        // Extract cell text from column bounds. When the column lies entirely
837        // before the trimmed content (col.end <= leading_ws_len) both bounds
838        // clamp to 0; treat that as an empty cell rather than re-emitting the
839        // whole row.
840        let cell_text = if cell_start < cell_end && cell_start < trimmed.len() {
841            &trimmed[cell_start..cell_end]
842        } else {
843            ""
844        };
845
846        let cell_content = cell_text.trim();
847        let cell_content_start = cell_text.len() - cell_text.trim_start().len();
848
849        // Emit any whitespace from current position to start of cell content
850        let content_abs_pos = (cell_start + cell_content_start).min(trimmed.len());
851        if current_pos < content_abs_pos {
852            builder.token(
853                SyntaxKind::WHITESPACE.into(),
854                &trimmed[current_pos..content_abs_pos],
855            );
856        }
857
858        // Emit cell with inline parsing
859        emit_table_cell(builder, cell_content, config);
860
861        // Update current position to end of cell content
862        current_pos = content_abs_pos + cell_content.len();
863    }
864
865    // Emit any remaining whitespace after last cell
866    if current_pos < trimmed.len() {
867        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
868    }
869
870    // Emit newline if present
871    if !newline_str.is_empty() {
872        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
873    }
874
875    builder.finish_node();
876}
877
878// ============================================================================
879// Pipe Table Parsing
880// ============================================================================
881
882/// Check if a line is a pipe table separator line.
883/// Returns the column alignments if it's a valid separator.
884fn try_parse_pipe_separator(line: &str) -> Option<Vec<Alignment>> {
885    let trimmed = line.trim();
886
887    // Must contain at least one pipe
888    if !trimmed.contains('|') && !trimmed.contains('+') {
889        return None;
890    }
891
892    // Split by pipes (or + for orgtbl variant)
893    let cells: Vec<&str> = if trimmed.contains('+') {
894        // Orgtbl variant: use + as separator in separator line
895        trimmed.split(['|', '+']).collect()
896    } else {
897        trimmed.split('|').collect()
898    };
899
900    let mut alignments = Vec::new();
901
902    for cell in cells {
903        let cell = cell.trim();
904
905        // Skip empty cells (from leading/trailing pipes)
906        if cell.is_empty() {
907            continue;
908        }
909
910        // Must be dashes with optional colons
911        let starts_colon = cell.starts_with(':');
912        let ends_colon = cell.ends_with(':');
913
914        // Remove colons to check if rest is all dashes
915        let without_colons = cell.trim_start_matches(':').trim_end_matches(':');
916
917        // Must have at least one dash
918        if without_colons.is_empty() || !without_colons.chars().all(|c| c == '-') {
919            return None;
920        }
921
922        // Determine alignment from colon positions
923        let alignment = match (starts_colon, ends_colon) {
924            (true, true) => Alignment::Center,
925            (true, false) => Alignment::Left,
926            (false, true) => Alignment::Right,
927            (false, false) => Alignment::Default,
928        };
929
930        alignments.push(alignment);
931    }
932
933    // Must have at least one column
934    if alignments.is_empty() {
935        None
936    } else {
937        Some(alignments)
938    }
939}
940
941/// Split a pipe table row into cells.
942/// Handles escaped pipes (\|) properly by not splitting on them.
943fn parse_pipe_table_row(line: &str) -> Vec<String> {
944    let trimmed = line.trim();
945
946    let mut cells = Vec::new();
947    let mut current_cell = String::new();
948    let mut chars = trimmed.chars().peekable();
949    let mut char_count = 0;
950
951    while let Some(ch) = chars.next() {
952        char_count += 1;
953        match ch {
954            '\\' => {
955                // Check if next char is a pipe - if so, it's an escaped pipe
956                if let Some(&'|') = chars.peek() {
957                    current_cell.push('\\');
958                    current_cell.push('|');
959                    chars.next(); // consume the pipe
960                } else {
961                    current_cell.push(ch);
962                }
963            }
964            '|' => {
965                // Check if this is the leading pipe (first character)
966                if char_count == 1 {
967                    continue; // Skip leading pipe
968                }
969
970                // End current cell, start new one
971                cells.push(current_cell.trim().to_string());
972                current_cell.clear();
973            }
974            _ => {
975                current_cell.push(ch);
976            }
977        }
978    }
979
980    // Add last cell if it's not empty (it would be empty if line ended with pipe)
981    let trimmed_cell = current_cell.trim().to_string();
982    if !trimmed_cell.is_empty() {
983        cells.push(trimmed_cell);
984    }
985
986    cells
987}
988
989/// Emit a pipe table row with inline-parsed cells.
990/// Preserves losslessness by emitting exact byte representation while parsing cell content inline.
991fn emit_pipe_table_row(
992    builder: &mut GreenNodeBuilder<'static>,
993    window: &StrippedLines<'_, '_>,
994    abs_idx: usize,
995    row_kind: SyntaxKind,
996    config: &ParserOptions,
997) {
998    builder.start_node(row_kind.into());
999
1000    // On continuation lines (separator/data rows under a list+blockquote
1001    // container) the leading `  > ` prefix is not consumed by the core;
1002    // `emit_prefix_at` re-emits it as WHITESPACE/BLOCK_QUOTE_MARKER tokens
1003    // and returns the stripped tail. On the dispatch line the core already
1004    // emitted the prefix, so `dispatch_tail` just strips it from our view.
1005    // With an empty prefix (non-nested tables) both are no-ops returning
1006    // the raw line.
1007    let line = if abs_idx == window.dispatch_pos() {
1008        window.dispatch_tail()
1009    } else {
1010        window.emit_prefix_at(builder, abs_idx)
1011    };
1012
1013    let (line_without_newline, newline_str) = strip_newline(line);
1014    let trimmed = line_without_newline.trim();
1015
1016    // Parse cell boundaries
1017    let mut cell_starts = Vec::new();
1018    let mut cell_ends = Vec::new();
1019    let mut in_escape = false;
1020
1021    // Find all pipe positions (excluding escaped ones)
1022    let mut pipe_positions = Vec::new();
1023    for (i, ch) in trimmed.char_indices() {
1024        if in_escape {
1025            in_escape = false;
1026            continue;
1027        }
1028        if ch == '\\' {
1029            in_escape = true;
1030            continue;
1031        }
1032        if ch == '|' {
1033            pipe_positions.push(i);
1034        }
1035    }
1036
1037    // Determine cell boundaries based on pipe positions
1038    if pipe_positions.is_empty() {
1039        // No pipes - treat entire line as one cell (shouldn't happen for valid pipe tables)
1040        cell_starts.push(0);
1041        cell_ends.push(trimmed.len());
1042    } else {
1043        // Check if line starts with pipe
1044        let start_pipe = pipe_positions.first() == Some(&0);
1045        // Check if line ends with pipe
1046        let end_pipe = pipe_positions.last() == Some(&(trimmed.len() - 1));
1047
1048        if start_pipe {
1049            // Skip first pipe
1050            for i in 1..pipe_positions.len() {
1051                cell_starts.push(pipe_positions[i - 1] + 1);
1052                cell_ends.push(pipe_positions[i]);
1053            }
1054            // Add last cell if there's no trailing pipe
1055            if !end_pipe {
1056                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1057                cell_ends.push(trimmed.len());
1058            }
1059        } else {
1060            // No leading pipe
1061            cell_starts.push(0);
1062            cell_ends.push(pipe_positions[0]);
1063
1064            for i in 1..pipe_positions.len() {
1065                cell_starts.push(pipe_positions[i - 1] + 1);
1066                cell_ends.push(pipe_positions[i]);
1067            }
1068
1069            // Add last cell if there's no trailing pipe
1070            if !end_pipe {
1071                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1072                cell_ends.push(trimmed.len());
1073            }
1074        }
1075    }
1076
1077    // Emit leading whitespace if present (before trim)
1078    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1079    if leading_ws_len > 0 {
1080        builder.token(
1081            SyntaxKind::WHITESPACE.into(),
1082            &line_without_newline[..leading_ws_len],
1083        );
1084    }
1085
1086    // Emit cells with pipes
1087    for (idx, (start, end)) in cell_starts.iter().zip(cell_ends.iter()).enumerate() {
1088        // Emit pipe before cell (except for first cell if no leading pipe)
1089        if *start > 0 {
1090            builder.token(SyntaxKind::TEXT.into(), "|");
1091        } else if idx == 0 && trimmed.starts_with('|') {
1092            // Leading pipe
1093            builder.token(SyntaxKind::TEXT.into(), "|");
1094        }
1095
1096        // Get cell content with its whitespace
1097        let cell_with_ws = &trimmed[*start..*end];
1098        let cell_content = cell_with_ws.trim();
1099
1100        // Emit leading whitespace within cell
1101        let cell_leading_ws = &cell_with_ws[..cell_with_ws.len() - cell_with_ws.trim_start().len()];
1102        if !cell_leading_ws.is_empty() {
1103            builder.token(SyntaxKind::WHITESPACE.into(), cell_leading_ws);
1104        }
1105
1106        // Emit cell with inline parsing
1107        emit_table_cell(builder, cell_content, config);
1108
1109        // Emit trailing whitespace within cell
1110        let cell_trailing_ws_start = cell_leading_ws.len() + cell_content.len();
1111        if cell_trailing_ws_start < cell_with_ws.len() {
1112            builder.token(
1113                SyntaxKind::WHITESPACE.into(),
1114                &cell_with_ws[cell_trailing_ws_start..],
1115            );
1116        }
1117    }
1118
1119    // Emit trailing pipe if present
1120    if !pipe_positions.is_empty() && trimmed.ends_with('|') {
1121        builder.token(SyntaxKind::TEXT.into(), "|");
1122    }
1123
1124    // Emit trailing whitespace after trim (before newline)
1125    let trailing_ws_start = leading_ws_len + trimmed.len();
1126    if trailing_ws_start < line_without_newline.len() {
1127        builder.token(
1128            SyntaxKind::WHITESPACE.into(),
1129            &line_without_newline[trailing_ws_start..],
1130        );
1131    }
1132
1133    // Emit newline
1134    if !newline_str.is_empty() {
1135        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1136    }
1137
1138    builder.finish_node();
1139}
1140
1141/// Try to parse a pipe table starting at the given position.
1142/// Returns the number of lines consumed if successful.
1143pub(crate) fn try_parse_pipe_table(
1144    window: &StrippedLines<'_, '_>,
1145    builder: &mut GreenNodeBuilder<'static>,
1146    config: &ParserOptions,
1147) -> Option<usize> {
1148    let lines = window.raw();
1149    let start_pos = window.pos();
1150    if start_pos + 1 >= lines.len() {
1151        return None;
1152    }
1153
1154    // Detection scans run against a container-prefix-stripped view, so a
1155    // table nested in `list → blockquote` (e.g. `- > | a | b |`) has its
1156    // `  > ` prefix removed before the separator/cell shape checks. Each
1157    // entry is a no-alloc tail slice of the matching raw line; with an
1158    // empty prefix `stripped == lines`. The dispatch line uses the
1159    // emission-safe line-0 strip (its prefix was consumed by the core);
1160    // every other line gets the full continuation strip. Emission still
1161    // reads raw `lines` so the prefix bytes can be re-emitted as tokens.
1162    let stripped = window.strip_all();
1163
1164    // Check if this line is a caption followed by a table
1165    // If so, the actual table starts after the caption and blank line
1166    let (actual_start, caption_before) = if is_caption_followed_by_table(&stripped, start_pos) {
1167        let (cap_start, cap_end) = caption_range_starting_at(&stripped, start_pos)?;
1168        let mut pos = cap_end;
1169        while pos < stripped.len() && stripped[pos].trim().is_empty() {
1170            pos += 1;
1171        }
1172        (pos, Some((cap_start, cap_end)))
1173    } else {
1174        (start_pos, None)
1175    };
1176
1177    if actual_start + 1 >= lines.len() {
1178        return None;
1179    }
1180
1181    // First line should have pipes (potential header)
1182    if !stripped[actual_start].contains('|') {
1183        return None;
1184    }
1185
1186    // Second line should be separator
1187    let alignments = try_parse_pipe_separator(stripped[actual_start + 1])?;
1188
1189    // Parse header cells
1190    let header_cells = parse_pipe_table_row(stripped[actual_start]);
1191
1192    // Number of columns should match (approximately - be lenient)
1193    if header_cells.len() != alignments.len() && !header_cells.is_empty() {
1194        // Only fail if very different
1195        if header_cells.len() < alignments.len() / 2 || header_cells.len() > alignments.len() * 2 {
1196            return None;
1197        }
1198    }
1199
1200    // Find table end (first blank line or end of input)
1201    let mut end_pos = actual_start + 2;
1202    while end_pos < stripped.len() {
1203        let line = stripped[end_pos];
1204        if line.trim().is_empty() {
1205            break;
1206        }
1207        // Row should have pipes
1208        if !line.contains('|') {
1209            break;
1210        }
1211        end_pos += 1;
1212    }
1213
1214    // Must have at least one data row
1215    if end_pos <= actual_start + 2 {
1216        return None;
1217    }
1218
1219    // Check for caption before table (only if we didn't already detect it)
1220    let caption_before =
1221        caption_before.or_else(|| find_caption_before_table(&stripped, actual_start));
1222
1223    // Check for caption after table
1224    let caption_after = if caption_before.is_some() {
1225        None
1226    } else {
1227        find_caption_after_table(&stripped, end_pos)
1228    };
1229
1230    // Build the pipe table
1231    builder.start_node(SyntaxKind::PIPE_TABLE.into());
1232
1233    // Emit caption before if present
1234    if let Some((cap_start, cap_end)) = caption_before {
1235        emit_table_caption(builder, lines, cap_start, cap_end, config);
1236        // Emit blank line between caption and table if present
1237        if cap_end < actual_start {
1238            for line in lines.iter().take(actual_start).skip(cap_end) {
1239                if line.trim().is_empty() {
1240                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1241                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1242                    builder.finish_node();
1243                }
1244            }
1245        }
1246    }
1247
1248    // Emit header row with inline-parsed cells. On the dispatch line the
1249    // core already emitted the container prefix; only when the header is a
1250    // continuation line (e.g. it follows a caption-before line) do we emit
1251    // the prefix here.
1252    emit_pipe_table_row(
1253        builder,
1254        window,
1255        actual_start,
1256        SyntaxKind::TABLE_HEADER,
1257        config,
1258    );
1259
1260    // Emit separator, re-emitting any continuation-line container prefix
1261    // (`  > `) as WHITESPACE/BLOCK_QUOTE_MARKER tokens before the row text.
1262    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1263    let sep_idx = actual_start + 1;
1264    let separator_tail = if sep_idx == window.dispatch_pos() {
1265        window.dispatch_tail()
1266    } else {
1267        window.emit_prefix_at(builder, sep_idx)
1268    };
1269    emit_line_tokens(builder, separator_tail);
1270    builder.finish_node();
1271
1272    // Emit data rows with inline-parsed cells (always continuation lines)
1273    for idx in (actual_start + 2)..end_pos {
1274        emit_pipe_table_row(builder, window, idx, SyntaxKind::TABLE_ROW, config);
1275    }
1276
1277    // Emit caption after if present
1278    if let Some((cap_start, cap_end)) = caption_after {
1279        // Emit blank line before caption if needed
1280        if cap_start > end_pos {
1281            for line in lines.iter().take(cap_start).skip(end_pos) {
1282                if line.trim().is_empty() {
1283                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1284                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1285                    builder.finish_node();
1286                }
1287            }
1288        }
1289        emit_table_caption(builder, lines, cap_start, cap_end, config);
1290    }
1291
1292    builder.finish_node(); // PipeTable
1293
1294    // Calculate lines consumed
1295    let table_start = caption_before
1296        .map(|(start, _)| start)
1297        .unwrap_or(actual_start);
1298    let table_end = if let Some((_, cap_end)) = caption_after {
1299        cap_end
1300    } else {
1301        end_pos
1302    };
1303
1304    Some(table_end - table_start)
1305}
1306
1307#[cfg(test)]
1308mod tests {
1309    use super::super::container_prefix::ContainerPrefix;
1310    use super::*;
1311
1312    #[test]
1313    fn test_separator_detection() {
1314        assert!(try_parse_table_separator("------- ------ ----------   -------").is_some());
1315        assert!(try_parse_table_separator("  ---  ---  ---").is_some());
1316        assert!(try_parse_table_separator("-------").is_none()); // horizontal rule
1317        assert!(try_parse_table_separator("--- --- ---").is_some()); // table separator
1318    }
1319
1320    #[test]
1321    fn test_column_extraction() {
1322        let line = "-------     ------ ----------   -------";
1323        let columns = extract_columns(line, 0);
1324        assert_eq!(columns.len(), 4);
1325    }
1326
1327    #[test]
1328    fn test_simple_table_with_header() {
1329        let input = vec![
1330            "  Right     Left     Center     Default",
1331            "-------     ------ ----------   -------",
1332            "     12     12        12            12",
1333            "    123     123       123          123",
1334            "",
1335        ];
1336
1337        let mut builder = GreenNodeBuilder::new();
1338        let prefix = ContainerPrefix::default();
1339        let window = StrippedLines::new(&input, 0, &prefix);
1340        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1341
1342        assert!(result.is_some());
1343        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1344    }
1345
1346    #[test]
1347    fn test_headerless_table() {
1348        let input = vec![
1349            "-------     ------ ----------   -------",
1350            "     12     12        12            12",
1351            "    123     123       123          123",
1352            "",
1353        ];
1354
1355        let mut builder = GreenNodeBuilder::new();
1356        let prefix = ContainerPrefix::default();
1357        let window = StrippedLines::new(&input, 0, &prefix);
1358        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1359
1360        assert!(result.is_some());
1361        assert_eq!(result.unwrap(), 3); // sep + 2 rows
1362    }
1363
1364    #[test]
1365    fn test_caption_prefix_detection() {
1366        assert!(try_parse_caption_prefix("Table: My caption").is_some());
1367        assert!(try_parse_caption_prefix("table: My caption").is_some());
1368        assert!(try_parse_caption_prefix(": My caption").is_some());
1369        assert!(try_parse_caption_prefix(":").is_none()); // Just colon, no content
1370        assert!(try_parse_caption_prefix("Not a caption").is_none());
1371    }
1372
1373    #[test]
1374    fn bare_colon_fenced_code_is_not_table_caption() {
1375        let input = "Term\n: ```\n  code\n  ```\n";
1376        let tree = crate::parse(input, None);
1377
1378        assert!(
1379            tree.descendants()
1380                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1381            "should parse as definition list"
1382        );
1383        assert!(
1384            tree.descendants()
1385                .any(|node| node.kind() == SyntaxKind::CODE_BLOCK),
1386            "definition should preserve fenced code block"
1387        );
1388        assert!(
1389            !tree
1390                .descendants()
1391                .any(|node| node.kind() == SyntaxKind::TABLE_CAPTION),
1392            "fenced code definition should not be parsed as table caption"
1393        );
1394    }
1395
1396    #[test]
1397    fn bare_colon_caption_after_div_opening_is_table_caption() {
1398        let input = "::: {#tbl:panel layout.nrow=\"1\"}\n  : My Caption {#tbl:foo-1}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\n  : My Caption2 {#tbl:foo-2}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\nCaption\n:::\n";
1399        let tree = crate::parse(input, None);
1400
1401        let caption_count = tree
1402            .descendants()
1403            .filter(|node| node.kind() == SyntaxKind::TABLE_CAPTION)
1404            .count();
1405        assert_eq!(
1406            caption_count, 2,
1407            "expected both captions to attach to tables"
1408        );
1409        assert!(
1410            !tree
1411                .descendants()
1412                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1413            "caption lines in this fenced div table layout should not parse as definition list"
1414        );
1415    }
1416
1417    #[test]
1418    fn test_table_with_caption_after() {
1419        let input = vec![
1420            "  Right     Left     Center     Default",
1421            "-------     ------ ----------   -------",
1422            "     12     12        12            12",
1423            "    123     123       123          123",
1424            "",
1425            "Table: Demonstration of simple table syntax.",
1426            "",
1427        ];
1428
1429        let mut builder = GreenNodeBuilder::new();
1430        let prefix = ContainerPrefix::default();
1431        let window = StrippedLines::new(&input, 0, &prefix);
1432        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1433
1434        assert!(result.is_some());
1435        // Should consume: header + sep + 2 rows + blank + caption
1436        assert_eq!(result.unwrap(), 6);
1437    }
1438
1439    #[test]
1440    fn test_table_with_caption_before() {
1441        let input = vec![
1442            "Table: Demonstration of simple table syntax.",
1443            "",
1444            "  Right     Left     Center     Default",
1445            "-------     ------ ----------   -------",
1446            "     12     12        12            12",
1447            "    123     123       123          123",
1448            "",
1449        ];
1450
1451        let mut builder = GreenNodeBuilder::new();
1452        let prefix = ContainerPrefix::default();
1453        let window = StrippedLines::new(&input, 2, &prefix);
1454        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1455
1456        assert!(result.is_some());
1457        // Should consume: caption + blank + header + sep + 2 rows
1458        assert_eq!(result.unwrap(), 6);
1459    }
1460
1461    #[test]
1462    fn test_caption_with_colon_prefix() {
1463        let input = vec![
1464            "  Right     Left",
1465            "-------     ------",
1466            "     12     12",
1467            "",
1468            ": Short caption",
1469            "",
1470        ];
1471
1472        let mut builder = GreenNodeBuilder::new();
1473        let prefix = ContainerPrefix::default();
1474        let window = StrippedLines::new(&input, 0, &prefix);
1475        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1476
1477        assert!(result.is_some());
1478        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1479    }
1480
1481    #[test]
1482    fn test_multiline_caption() {
1483        let input = vec![
1484            "  Right     Left",
1485            "-------     ------",
1486            "     12     12",
1487            "",
1488            "Table: This is a longer caption",
1489            "that spans multiple lines.",
1490            "",
1491        ];
1492
1493        let mut builder = GreenNodeBuilder::new();
1494        let prefix = ContainerPrefix::default();
1495        let window = StrippedLines::new(&input, 0, &prefix);
1496        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1497
1498        assert!(result.is_some());
1499        // Should consume through end of multi-line caption
1500        assert_eq!(result.unwrap(), 6);
1501    }
1502
1503    #[test]
1504    fn test_simple_table_with_multibyte_cell_content() {
1505        let input = vec![
1506            "Name            Hex code     Hue     C, M, Y, K (%)   R, G, B (0-255)   R, G, B (%)",
1507            "--------------  ------------ ------- ---------------- ----------------- ------------",
1508            "        orange       #E69F00     41° 0, 50, 100, 0    230, 159, 0       90, 60, 0",
1509            "      sky blue       #56B4E9    202° 80, 0, 0, 0      86, 180, 233      35, 70, 90",
1510            "",
1511        ];
1512
1513        let mut builder = GreenNodeBuilder::new();
1514        let prefix = ContainerPrefix::default();
1515        let window = StrippedLines::new(&input, 0, &prefix);
1516        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1517
1518        assert!(result.is_some());
1519        assert_eq!(result.unwrap(), 4);
1520    }
1521
1522    // Pipe table tests
1523    #[test]
1524    fn test_pipe_separator_detection() {
1525        assert!(try_parse_pipe_separator("|------:|:-----|---------|:------:|").is_some());
1526        assert!(try_parse_pipe_separator("|---|---|").is_some());
1527        assert!(try_parse_pipe_separator("-----|-----:").is_some()); // No leading pipe
1528        assert!(try_parse_pipe_separator("|-----+-------|").is_some()); // Orgtbl variant
1529        assert!(try_parse_pipe_separator("not a separator").is_none());
1530    }
1531
1532    #[test]
1533    fn test_pipe_alignments() {
1534        let aligns = try_parse_pipe_separator("|------:|:-----|---------|:------:|").unwrap();
1535        assert_eq!(aligns.len(), 4);
1536        assert_eq!(aligns[0], Alignment::Right);
1537        assert_eq!(aligns[1], Alignment::Left);
1538        assert_eq!(aligns[2], Alignment::Default);
1539        assert_eq!(aligns[3], Alignment::Center);
1540    }
1541
1542    #[test]
1543    fn test_parse_pipe_table_row() {
1544        let cells = parse_pipe_table_row("| Right | Left | Center |");
1545        assert_eq!(cells.len(), 3);
1546        assert_eq!(cells[0], "Right");
1547        assert_eq!(cells[1], "Left");
1548        assert_eq!(cells[2], "Center");
1549
1550        // Without leading/trailing pipes
1551        let cells2 = parse_pipe_table_row("Right | Left | Center");
1552        assert_eq!(cells2.len(), 3);
1553    }
1554
1555    #[test]
1556    fn test_basic_pipe_table() {
1557        let input = vec![
1558            "",
1559            "| Right | Left | Center |",
1560            "|------:|:-----|:------:|",
1561            "|   12  |  12  |   12   |",
1562            "|  123  |  123 |  123   |",
1563            "",
1564        ];
1565
1566        let mut builder = GreenNodeBuilder::new();
1567        let prefix = ContainerPrefix::default();
1568        let window = StrippedLines::new(&input, 1, &prefix);
1569        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1570
1571        assert!(result.is_some());
1572        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1573    }
1574
1575    #[test]
1576    fn test_pipe_table_no_edge_pipes() {
1577        let input = vec![
1578            "",
1579            "fruit| price",
1580            "-----|-----:",
1581            "apple|2.05",
1582            "pear|1.37",
1583            "",
1584        ];
1585
1586        let mut builder = GreenNodeBuilder::new();
1587        let prefix = ContainerPrefix::default();
1588        let window = StrippedLines::new(&input, 1, &prefix);
1589        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1590
1591        assert!(result.is_some());
1592        assert_eq!(result.unwrap(), 4);
1593    }
1594
1595    #[test]
1596    fn test_pipe_table_with_caption() {
1597        let input = vec![
1598            "",
1599            "| Col1 | Col2 |",
1600            "|------|------|",
1601            "| A    | B    |",
1602            "",
1603            "Table: My pipe table",
1604            "",
1605        ];
1606
1607        let mut builder = GreenNodeBuilder::new();
1608        let prefix = ContainerPrefix::default();
1609        let window = StrippedLines::new(&input, 1, &prefix);
1610        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1611
1612        assert!(result.is_some());
1613        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1614    }
1615
1616    #[test]
1617    fn test_pipe_table_with_multiline_caption_before() {
1618        let input = vec![
1619            ": (#tab:base) base R quoting",
1620            "functions",
1621            "",
1622            "| C | D |",
1623            "|---|---|",
1624            "| 3 | 4 |",
1625            "",
1626        ];
1627
1628        let mut builder = GreenNodeBuilder::new();
1629        let prefix = ContainerPrefix::default();
1630        let window = StrippedLines::new(&input, 0, &prefix);
1631        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1632
1633        assert!(result.is_some());
1634        // caption(2) + blank(1) + header + sep + row
1635        assert_eq!(result.unwrap(), 6);
1636    }
1637}
1638
1639// ============================================================================
1640// Grid Table Parsing
1641// ============================================================================
1642
1643/// Check if a line is a grid table row separator (starts with +, contains -, ends with +).
1644/// Returns Some(vec of column info) if valid, None otherwise.
1645fn try_parse_grid_separator(line: &str) -> Option<Vec<GridColumn>> {
1646    let trimmed = line.trim_start();
1647    let leading_spaces = line.len() - trimmed.len();
1648
1649    // Must have leading spaces <= 3 to not be a code block
1650    if leading_spaces > 3 {
1651        return None;
1652    }
1653
1654    // Must start with + and end with +
1655    if !trimmed.starts_with('+') || !trimmed.trim_end().ends_with('+') {
1656        return None;
1657    }
1658
1659    // Split by + to get column segments
1660    let trimmed = trimmed.trim_end();
1661    let segments: Vec<&str> = trimmed.split('+').collect();
1662
1663    // Need at least 3 parts: empty before first +, column(s), empty after last +
1664    if segments.len() < 3 {
1665        return None;
1666    }
1667
1668    let mut columns = Vec::new();
1669
1670    // Parse each segment between + signs
1671    for segment in segments.iter().skip(1).take(segments.len() - 2) {
1672        if segment.is_empty() {
1673            continue;
1674        }
1675
1676        // Segment must be dashes/equals with optional colons for alignment
1677        let seg_trimmed = *segment;
1678
1679        // Get the fill character (after removing colons)
1680        let inner = seg_trimmed.trim_start_matches(':').trim_end_matches(':');
1681
1682        // Must be all dashes or all equals
1683        if inner.is_empty() {
1684            return None;
1685        }
1686
1687        let first_char = inner.chars().next().unwrap();
1688        if first_char != '-' && first_char != '=' {
1689            return None;
1690        }
1691
1692        if !inner.chars().all(|c| c == first_char) {
1693            return None;
1694        }
1695
1696        let is_header_sep = first_char == '=';
1697
1698        columns.push(GridColumn {
1699            is_header_separator: is_header_sep,
1700            width: seg_trimmed.chars().count(),
1701        });
1702    }
1703
1704    if columns.is_empty() {
1705        None
1706    } else {
1707        Some(columns)
1708    }
1709}
1710
1711/// Column information for grid tables.
1712#[derive(Debug, Clone)]
1713struct GridColumn {
1714    is_header_separator: bool,
1715    width: usize,
1716}
1717
1718fn slice_cell_by_display_width(line: &str, start_byte: usize, width: usize) -> (usize, usize) {
1719    let mut end_byte = start_byte;
1720    let mut display_cols = 0usize;
1721
1722    for (offset, ch) in line[start_byte..].char_indices() {
1723        if ch == '|' {
1724            let sep_byte = start_byte + offset;
1725            return (sep_byte, sep_byte + 1);
1726        }
1727        let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
1728        if display_cols + ch_width > width {
1729            break;
1730        }
1731        display_cols += ch_width;
1732        end_byte = start_byte + offset + ch.len_utf8();
1733        if display_cols >= width {
1734            break;
1735        }
1736    }
1737
1738    // If the width budget is exhausted before seeing a separator (for example
1739    // because of padding/layout drift), advance to the next literal separator
1740    // to keep row slicing aligned and preserve losslessness.
1741    let mut sep_byte = end_byte;
1742    while sep_byte < line.len() {
1743        let mut chars = line[sep_byte..].chars();
1744        let Some(ch) = chars.next() else {
1745            break;
1746        };
1747        if ch == '|' {
1748            return (sep_byte, sep_byte + 1);
1749        }
1750        sep_byte += ch.len_utf8();
1751    }
1752
1753    (end_byte, end_byte)
1754}
1755
1756/// Check if a line is a grid table content row.
1757/// Accepts normal rows ending with `|` and spanning-style continuation lines ending with `+`.
1758fn is_grid_content_row(line: &str) -> bool {
1759    let trimmed = line.trim_start();
1760    let leading_spaces = line.len() - trimmed.len();
1761
1762    if leading_spaces > 3 {
1763        return false;
1764    }
1765
1766    let trimmed = trimmed.trim_end();
1767    trimmed.starts_with('|') && (trimmed.ends_with('|') || trimmed.ends_with('+'))
1768}
1769
1770/// Extract cell contents from a single grid table row line.
1771/// Returns a vector of cell contents (trimmed) based on column boundaries.
1772/// Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1773fn extract_grid_cells_from_line(line: &str, _columns: &[GridColumn]) -> Vec<String> {
1774    let (line_content, _) = strip_newline(line);
1775    let line_trimmed = line_content.trim();
1776
1777    if !line_trimmed.starts_with('|') || !line_trimmed.ends_with('|') {
1778        return vec![String::new(); _columns.len()];
1779    }
1780
1781    let mut cells = Vec::with_capacity(_columns.len());
1782    let mut pos_byte = 1; // Skip leading pipe
1783
1784    for col in _columns {
1785        let col_idx = cells.len();
1786        if pos_byte >= line_trimmed.len() {
1787            cells.push(String::new());
1788            continue;
1789        }
1790
1791        let start_byte = pos_byte;
1792        let end_byte = if col_idx + 1 == _columns.len() {
1793            line_trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1794        } else {
1795            let (end, next_start) = slice_cell_by_display_width(line_trimmed, pos_byte, col.width);
1796            pos_byte = next_start;
1797            end
1798        };
1799        cells.push(line_trimmed[start_byte..end_byte].trim().to_string());
1800        if col_idx + 1 == _columns.len() {
1801            pos_byte = line_trimmed.len();
1802        }
1803    }
1804
1805    cells
1806}
1807
1808/// Emit a grid table row with inline-parsed cells.
1809/// Handles multi-line rows by emitting first line with TABLE_CELL nodes,
1810/// then continuation lines as raw TEXT for losslessness.
1811fn emit_grid_table_row(
1812    builder: &mut GreenNodeBuilder<'static>,
1813    window: &StrippedLines<'_, '_>,
1814    indices: &[usize],
1815    columns: &[GridColumn],
1816    row_kind: SyntaxKind,
1817    config: &ParserOptions,
1818) {
1819    if indices.is_empty() {
1820        return;
1821    }
1822
1823    builder.start_node(row_kind.into());
1824
1825    // Emit first line with TABLE_CELL nodes. The continuation-line container
1826    // prefix (`  > `) is re-emitted as WHITESPACE/BLOCK_QUOTE_MARKER tokens
1827    // inside the row node before the cell text; the returned tail is the
1828    // prefix-stripped line we slice cells from (empty prefix ⇒ raw line).
1829    // Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1830    let first_line = window.emit_or_dispatch_tail(builder, indices[0]);
1831    let cell_contents = extract_grid_cells_from_line(first_line, columns);
1832    let (line_without_newline, newline_str) = strip_newline(first_line);
1833    let trimmed = line_without_newline.trim();
1834    let expected_pipe_count = columns.len().saturating_add(1);
1835    let actual_pipe_count = trimmed.chars().filter(|&c| c == '|').count();
1836
1837    // Rows that don't contain all expected column separators (spanning-style rows)
1838    // must be emitted verbatim for losslessness. The first line's prefix was
1839    // already consumed above; emit its tail and each continuation tail.
1840    if actual_pipe_count != expected_pipe_count {
1841        emit_line_tokens(builder, first_line);
1842        for &idx in &indices[1..] {
1843            let tail = window.emit_or_dispatch_tail(builder, idx);
1844            emit_line_tokens(builder, tail);
1845        }
1846        builder.finish_node();
1847        return;
1848    }
1849
1850    // Emit leading whitespace
1851    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1852    if leading_ws_len > 0 {
1853        builder.token(
1854            SyntaxKind::WHITESPACE.into(),
1855            &line_without_newline[..leading_ws_len],
1856        );
1857    }
1858
1859    // Emit leading pipe
1860    if trimmed.starts_with('|') {
1861        builder.token(SyntaxKind::TEXT.into(), "|");
1862    }
1863
1864    // Emit each cell based on fixed column widths from separators
1865    let mut pos_byte = 1usize; // after leading pipe
1866    for (idx, cell_content) in cell_contents.iter().enumerate() {
1867        let part = if idx < columns.len() && pos_byte <= trimmed.len() {
1868            let start_byte = pos_byte;
1869            let end_byte = if idx + 1 == columns.len() && !trimmed.is_empty() {
1870                trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1871            } else {
1872                let (end, next_start) =
1873                    slice_cell_by_display_width(trimmed, pos_byte, columns[idx].width);
1874                pos_byte = next_start;
1875                end
1876            };
1877            let slice = &trimmed[start_byte..end_byte];
1878            if idx + 1 == columns.len() {
1879                pos_byte = trimmed.len();
1880            }
1881            slice
1882        } else {
1883            ""
1884        };
1885
1886        // Emit leading whitespace in cell
1887        let cell_trimmed = part.trim();
1888        let ws_start_len = part.len() - part.trim_start().len();
1889        if ws_start_len > 0 {
1890            builder.token(SyntaxKind::WHITESPACE.into(), &part[..ws_start_len]);
1891        }
1892
1893        // Emit TABLE_CELL with inline parsing
1894        emit_table_cell(builder, cell_content, config);
1895
1896        // Emit trailing whitespace in cell
1897        let ws_end_start = ws_start_len + cell_trimmed.len();
1898        if ws_end_start < part.len() {
1899            builder.token(SyntaxKind::WHITESPACE.into(), &part[ws_end_start..]);
1900        }
1901
1902        // Emit pipe separator (unless this is the last cell and line doesn't end with |)
1903        if idx < cell_contents.len() - 1 || trimmed.ends_with('|') {
1904            builder.token(SyntaxKind::TEXT.into(), "|");
1905        }
1906    }
1907
1908    // Emit trailing whitespace before newline
1909    let trailing_ws_start = leading_ws_len + trimmed.len();
1910    if trailing_ws_start < line_without_newline.len() {
1911        builder.token(
1912            SyntaxKind::WHITESPACE.into(),
1913            &line_without_newline[trailing_ws_start..],
1914        );
1915    }
1916
1917    // Emit newline
1918    if !newline_str.is_empty() {
1919        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1920    }
1921
1922    // Emit continuation lines as TEXT for losslessness, re-emitting each
1923    // line's container prefix first.
1924    for &idx in &indices[1..] {
1925        let tail = window.emit_or_dispatch_tail(builder, idx);
1926        emit_line_tokens(builder, tail);
1927    }
1928
1929    builder.finish_node();
1930}
1931
1932/// Try to parse a grid table starting at the given position.
1933/// Returns the number of lines consumed if successful.
1934pub(crate) fn try_parse_grid_table(
1935    window: &StrippedLines<'_, '_>,
1936    builder: &mut GreenNodeBuilder<'static>,
1937    config: &ParserOptions,
1938) -> Option<usize> {
1939    let lines = window.raw();
1940    let start_pos = window.pos();
1941    if start_pos >= lines.len() {
1942        return None;
1943    }
1944
1945    // Detection scans run against the container-prefix-stripped view so a
1946    // grid table nested in `list → blockquote` (e.g. `- > +---+---+`) has its
1947    // `  > ` prefix removed before the separator/content-row shape checks.
1948    // With an empty prefix `stripped == lines`. Emission re-emits the prefix
1949    // bytes as tokens via the window; captions/blank lines read raw `lines`.
1950    let stripped = window.strip_all();
1951
1952    // Check if this line is a caption followed by a table
1953    // If so, the actual table starts after the caption and blank line
1954    let (actual_start, caption_before) = if is_caption_followed_by_table(&stripped, start_pos) {
1955        let (cap_start, cap_end) = caption_range_starting_at(&stripped, start_pos)?;
1956        let mut pos = cap_end;
1957        while pos < stripped.len() && stripped[pos].trim().is_empty() {
1958            pos += 1;
1959        }
1960        (pos, Some((cap_start, cap_end)))
1961    } else {
1962        (start_pos, None)
1963    };
1964
1965    if actual_start >= lines.len() {
1966        return None;
1967    }
1968
1969    // First line must be a grid separator
1970    let first_line = stripped[actual_start];
1971    let _columns = try_parse_grid_separator(first_line)?;
1972
1973    // Track table structure
1974    let mut end_pos = actual_start + 1;
1975    let mut found_header_sep = false;
1976    let mut in_footer = false;
1977
1978    // Scan table lines
1979    while end_pos < lines.len() {
1980        let line = stripped[end_pos];
1981
1982        // Check for blank line (table ends)
1983        if line.trim().is_empty() {
1984            break;
1985        }
1986
1987        // Check for separator line
1988        if let Some(sep_cols) = try_parse_grid_separator(line) {
1989            // Check if this is a header separator (=)
1990            if sep_cols.iter().any(|c| c.is_header_separator) {
1991                if !found_header_sep {
1992                    found_header_sep = true;
1993                } else if !in_footer {
1994                    // Second = separator starts footer
1995                    in_footer = true;
1996                }
1997            }
1998            end_pos += 1;
1999            continue;
2000        }
2001
2002        // Check for content row
2003        if is_grid_content_row(line) {
2004            end_pos += 1;
2005            continue;
2006        }
2007
2008        // Not a valid grid table line - table ends
2009        break;
2010    }
2011
2012    // Must have consumed at least 3 lines (top separator, content, bottom separator)
2013    // Or just top + content rows that end with a separator
2014    if end_pos <= actual_start + 1 {
2015        return None;
2016    }
2017
2018    // Last consumed line should be a separator for a well-formed table
2019    // But we'll be lenient and accept tables ending with content rows
2020
2021    // Check for caption before table (only if we didn't already detected it)
2022    let caption_before =
2023        caption_before.or_else(|| find_caption_before_table(&stripped, actual_start));
2024
2025    // Check for caption after table
2026    let caption_after = if caption_before.is_some() {
2027        None
2028    } else {
2029        find_caption_after_table(&stripped, end_pos)
2030    };
2031
2032    // Build the grid table
2033    builder.start_node(SyntaxKind::GRID_TABLE.into());
2034
2035    // Emit caption before if present
2036    if let Some((cap_start, cap_end)) = caption_before {
2037        emit_table_caption(builder, lines, cap_start, cap_end, config);
2038        // Emit blank line between caption and table if present
2039        if cap_end < actual_start {
2040            for line in lines.iter().take(actual_start).skip(cap_end) {
2041                if line.trim().is_empty() {
2042                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2043                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2044                    builder.finish_node();
2045                }
2046            }
2047        }
2048    }
2049
2050    // Track whether we've passed the header separator
2051    let mut past_header_sep = false;
2052    let mut in_footer_section = false;
2053    // Accumulate ABSOLUTE indices of the lines making up a multi-line row, so
2054    // each line's container prefix can be re-emitted via the window.
2055    let mut current_row_indices: Vec<usize> = Vec::new();
2056    let mut current_row_kind = SyntaxKind::TABLE_HEADER;
2057
2058    // Emit table rows - accumulate multi-line cells
2059    for (idx, &line) in stripped.iter().enumerate().take(end_pos).skip(actual_start) {
2060        if let Some(sep_cols) = try_parse_grid_separator(line) {
2061            // Separator line - emit any accumulated row first
2062            if !current_row_indices.is_empty() {
2063                emit_grid_table_row(
2064                    builder,
2065                    window,
2066                    &current_row_indices,
2067                    &sep_cols,
2068                    current_row_kind,
2069                    config,
2070                );
2071                current_row_indices.clear();
2072            }
2073
2074            let is_header_sep = sep_cols.iter().any(|c| c.is_header_separator);
2075
2076            // Re-emit any continuation-line container prefix (`  > `) as
2077            // WHITESPACE/BLOCK_QUOTE_MARKER tokens before the separator text.
2078            if is_header_sep {
2079                if !past_header_sep {
2080                    // This is the header/body separator
2081                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2082                    let tail = window.emit_or_dispatch_tail(builder, idx);
2083                    emit_line_tokens(builder, tail);
2084                    builder.finish_node();
2085                    past_header_sep = true;
2086                } else {
2087                    // Footer separator
2088                    if !in_footer_section {
2089                        in_footer_section = true;
2090                    }
2091                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2092                    let tail = window.emit_or_dispatch_tail(builder, idx);
2093                    emit_line_tokens(builder, tail);
2094                    builder.finish_node();
2095                }
2096            } else {
2097                // Regular separator (row boundary)
2098                builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2099                let tail = window.emit_or_dispatch_tail(builder, idx);
2100                emit_line_tokens(builder, tail);
2101                builder.finish_node();
2102            }
2103        } else if is_grid_content_row(line) {
2104            // Content row - accumulate for multi-line cells
2105            current_row_kind = if !past_header_sep && found_header_sep {
2106                SyntaxKind::TABLE_HEADER
2107            } else if in_footer_section {
2108                SyntaxKind::TABLE_FOOTER
2109            } else {
2110                SyntaxKind::TABLE_ROW
2111            };
2112
2113            current_row_indices.push(idx);
2114        }
2115    }
2116
2117    // Emit any remaining accumulated row
2118    if !current_row_indices.is_empty() {
2119        // Use first separator's columns for cell boundaries
2120        if let Some(sep_cols) = try_parse_grid_separator(stripped[actual_start]) {
2121            emit_grid_table_row(
2122                builder,
2123                window,
2124                &current_row_indices,
2125                &sep_cols,
2126                current_row_kind,
2127                config,
2128            );
2129        }
2130    }
2131
2132    // Emit caption after if present
2133    if let Some((cap_start, cap_end)) = caption_after {
2134        if cap_start > end_pos {
2135            for line in lines.iter().take(cap_start).skip(end_pos) {
2136                if line.trim().is_empty() {
2137                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2138                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2139                    builder.finish_node();
2140                }
2141            }
2142        }
2143        emit_table_caption(builder, lines, cap_start, cap_end, config);
2144    }
2145
2146    builder.finish_node(); // GRID_TABLE
2147
2148    // Calculate lines consumed
2149    let table_start = caption_before
2150        .map(|(start, _)| start)
2151        .unwrap_or(actual_start);
2152    let table_end = if let Some((_, cap_end)) = caption_after {
2153        cap_end
2154    } else {
2155        end_pos
2156    };
2157
2158    Some(table_end - table_start)
2159}
2160
2161#[cfg(test)]
2162mod grid_table_tests {
2163    use super::super::container_prefix::ContainerPrefix;
2164    use super::*;
2165
2166    #[test]
2167    fn test_grid_separator_detection() {
2168        assert!(try_parse_grid_separator("+---+---+").is_some());
2169        assert!(try_parse_grid_separator("+===+===+").is_some());
2170        assert!(try_parse_grid_separator("+---------------+---------------+").is_some());
2171        assert!(try_parse_grid_separator("+:---:+").is_some()); // center aligned
2172        assert!(try_parse_grid_separator("not a separator").is_none());
2173        assert!(try_parse_grid_separator("|---|---|").is_none()); // pipe table sep
2174    }
2175
2176    #[test]
2177    fn test_grid_header_separator() {
2178        let cols = try_parse_grid_separator("+===+===+").unwrap();
2179        assert!(cols.iter().all(|c| c.is_header_separator));
2180
2181        let cols2 = try_parse_grid_separator("+---+---+").unwrap();
2182        assert!(cols2.iter().all(|c| !c.is_header_separator));
2183    }
2184
2185    #[test]
2186    fn test_grid_content_row_detection() {
2187        assert!(is_grid_content_row("| content | content |"));
2188        assert!(is_grid_content_row("|  |  |"));
2189        assert!(is_grid_content_row("| content +------+"));
2190        assert!(!is_grid_content_row("+---+---+")); // separator, not content
2191        assert!(!is_grid_content_row("no pipes here"));
2192    }
2193
2194    #[test]
2195    fn test_basic_grid_table() {
2196        let input = vec![
2197            "+-------+-------+",
2198            "| Col1  | Col2  |",
2199            "+=======+=======+",
2200            "| A     | B     |",
2201            "+-------+-------+",
2202            "",
2203        ];
2204
2205        let mut builder = GreenNodeBuilder::new();
2206        let prefix = ContainerPrefix::default();
2207        let window = StrippedLines::new(&input, 0, &prefix);
2208        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2209
2210        assert!(result.is_some());
2211        assert_eq!(result.unwrap(), 5);
2212    }
2213
2214    #[test]
2215    fn test_grid_table_multirow() {
2216        let input = vec![
2217            "+---------------+---------------+",
2218            "| Fruit         | Advantages    |",
2219            "+===============+===============+",
2220            "| Bananas       | - wrapper     |",
2221            "|               | - color       |",
2222            "+---------------+---------------+",
2223            "| Oranges       | - scurvy      |",
2224            "|               | - tasty       |",
2225            "+---------------+---------------+",
2226            "",
2227        ];
2228
2229        let mut builder = GreenNodeBuilder::new();
2230        let prefix = ContainerPrefix::default();
2231        let window = StrippedLines::new(&input, 0, &prefix);
2232        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2233
2234        assert!(result.is_some());
2235        assert_eq!(result.unwrap(), 9);
2236    }
2237
2238    #[test]
2239    fn test_grid_table_with_footer() {
2240        let input = vec![
2241            "+-------+-------+",
2242            "| Fruit | Price |",
2243            "+=======+=======+",
2244            "| Apple | $1.00 |",
2245            "+-------+-------+",
2246            "| Pear  | $1.50 |",
2247            "+=======+=======+",
2248            "| Total | $2.50 |",
2249            "+=======+=======+",
2250            "",
2251        ];
2252
2253        let mut builder = GreenNodeBuilder::new();
2254        let prefix = ContainerPrefix::default();
2255        let window = StrippedLines::new(&input, 0, &prefix);
2256        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2257
2258        assert!(result.is_some());
2259        assert_eq!(result.unwrap(), 9);
2260    }
2261
2262    #[test]
2263    fn test_grid_table_headerless() {
2264        let input = vec![
2265            "+-------+-------+",
2266            "| A     | B     |",
2267            "+-------+-------+",
2268            "| C     | D     |",
2269            "+-------+-------+",
2270            "",
2271        ];
2272
2273        let mut builder = GreenNodeBuilder::new();
2274        let prefix = ContainerPrefix::default();
2275        let window = StrippedLines::new(&input, 0, &prefix);
2276        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2277
2278        assert!(result.is_some());
2279        assert_eq!(result.unwrap(), 5);
2280    }
2281
2282    #[test]
2283    fn test_grid_table_with_caption_before() {
2284        let input = vec![
2285            ": Sample table",
2286            "",
2287            "+-------+-------+",
2288            "| A     | B     |",
2289            "+=======+=======+",
2290            "| C     | D     |",
2291            "+-------+-------+",
2292            "",
2293        ];
2294
2295        let mut builder = GreenNodeBuilder::new();
2296        let prefix = ContainerPrefix::default();
2297        let window = StrippedLines::new(&input, 2, &prefix);
2298        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2299
2300        assert!(result.is_some());
2301        // Should include caption + blank + table
2302        assert_eq!(result.unwrap(), 7);
2303    }
2304
2305    #[test]
2306    fn test_grid_table_with_caption_after() {
2307        let input = vec![
2308            "+-------+-------+",
2309            "| A     | B     |",
2310            "+=======+=======+",
2311            "| C     | D     |",
2312            "+-------+-------+",
2313            "",
2314            "Table: My grid table",
2315            "",
2316        ];
2317
2318        let mut builder = GreenNodeBuilder::new();
2319        let prefix = ContainerPrefix::default();
2320        let window = StrippedLines::new(&input, 0, &prefix);
2321        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2322
2323        assert!(result.is_some());
2324        // table + blank + caption
2325        assert_eq!(result.unwrap(), 7);
2326    }
2327}
2328
2329// ============================================================================
2330// Multiline Table Parsing
2331// ============================================================================
2332
2333/// Check if a line is a multiline table separator (continuous dashes).
2334/// Multiline table separators span the full width and are all dashes.
2335/// Returns Some(columns) if valid, None otherwise.
2336fn try_parse_multiline_separator(line: &str) -> Option<Vec<Column>> {
2337    let trimmed = line.trim_start();
2338    let leading_spaces = line.len() - trimmed.len();
2339
2340    // Must have leading spaces <= 3 to not be a code block
2341    if leading_spaces > 3 {
2342        return None;
2343    }
2344
2345    let trimmed = trimmed.trim_end();
2346
2347    // Must be all dashes (continuous line of dashes)
2348    if trimmed.is_empty() || !trimmed.chars().all(|c| c == '-') {
2349        return None;
2350    }
2351
2352    // Must have at least 3 dashes
2353    if trimmed.len() < 3 {
2354        return None;
2355    }
2356
2357    // This is a full-width separator - columns will be determined by column separator lines
2358    Some(vec![Column {
2359        start: leading_spaces,
2360        end: leading_spaces + trimmed.len(),
2361        alignment: Alignment::Default,
2362    }])
2363}
2364
2365/// Check if a line is a column separator line for multiline tables.
2366/// Column separators have dashes with spaces between them to define columns.
2367fn is_column_separator(line: &str) -> bool {
2368    try_parse_table_separator(line).is_some() && !line.contains('*') && !line.contains('_')
2369}
2370
2371fn is_headerless_single_row_without_blank(
2372    lines: &[&str],
2373    row_start: usize,
2374    row_end: usize,
2375    columns: &[Column],
2376) -> bool {
2377    if row_start >= row_end {
2378        return false;
2379    }
2380
2381    if row_end - row_start == 1 {
2382        return false;
2383    }
2384
2385    let Some(last_col) = columns.last() else {
2386        return false;
2387    };
2388
2389    for line in lines.iter().take(row_end).skip(row_start + 1) {
2390        let (content, _) = strip_newline(line);
2391        let prefix_end = last_col.start.min(content.len());
2392        if !content[..prefix_end].trim().is_empty() {
2393            return false;
2394        }
2395    }
2396
2397    true
2398}
2399
2400/// Try to parse a multiline table starting at the given position.
2401/// Returns the number of lines consumed if successful.
2402pub(crate) fn try_parse_multiline_table(
2403    window: &StrippedLines<'_, '_>,
2404    builder: &mut GreenNodeBuilder<'static>,
2405    config: &ParserOptions,
2406) -> Option<usize> {
2407    let lines = window.raw();
2408    let start_pos = window.pos();
2409    if start_pos >= lines.len() {
2410        return None;
2411    }
2412
2413    // Detection scans run against the container-prefix-stripped view so a
2414    // multiline table nested in `list → blockquote` (e.g. `- > ----`) has its
2415    // `  > ` prefix removed before the separator/blank-row shape checks. The
2416    // interior `>`-only row then strips to `""` and registers as a blank row
2417    // separator. With an empty prefix `stripped == lines`. Emission re-emits
2418    // the prefix bytes as tokens via the window; captions read raw `lines`.
2419    let stripped = window.strip_all();
2420
2421    let first_line = stripped[start_pos];
2422
2423    // First line can be either:
2424    // 1. A full-width dash separator (for tables with headers)
2425    // 2. A column separator (for headerless tables)
2426    let is_full_width_start = try_parse_multiline_separator(first_line).is_some();
2427    let is_column_sep_start = !is_full_width_start && is_column_separator(first_line);
2428    let headerless_columns = if is_column_sep_start {
2429        try_parse_table_separator(first_line)
2430    } else {
2431        None
2432    };
2433
2434    if !is_full_width_start && !is_column_sep_start {
2435        return None;
2436    }
2437
2438    // Look ahead to find the structure
2439    let mut pos = start_pos + 1;
2440    let mut found_column_sep = is_column_sep_start; // Already found if headerless
2441    let mut column_sep_pos = if is_column_sep_start { start_pos } else { 0 };
2442    let mut has_header = false;
2443    let mut found_blank_line = false;
2444    let mut found_closing_sep = false;
2445    let mut content_line_count = 0usize;
2446
2447    // Scan for header section and column separator
2448    while pos < lines.len() {
2449        let line = stripped[pos];
2450
2451        // Check for column separator (defines columns) - only if we started with full-width
2452        if is_full_width_start && is_column_separator(line) && !found_column_sep {
2453            found_column_sep = true;
2454            column_sep_pos = pos;
2455            has_header = pos > start_pos + 1; // Has header if there's content before column sep
2456            pos += 1;
2457            continue;
2458        }
2459
2460        // Check for blank line (row separator in body)
2461        if line.trim().is_empty() {
2462            found_blank_line = true;
2463            pos += 1;
2464            // Check if next line is a valid closing separator for this table shape.
2465            if pos < lines.len() {
2466                let next = stripped[pos];
2467                let is_valid_closer = if is_full_width_start {
2468                    try_parse_multiline_separator(next).is_some()
2469                } else {
2470                    is_column_separator(next)
2471                };
2472                if is_valid_closer {
2473                    found_closing_sep = true;
2474                    pos += 1; // Include the closing separator
2475                    break;
2476                }
2477            }
2478            continue;
2479        }
2480
2481        // Check for closing full-width dashes (only for full-width-start tables).
2482        if is_full_width_start && try_parse_multiline_separator(line).is_some() {
2483            found_closing_sep = true;
2484            pos += 1;
2485            break;
2486        }
2487
2488        // Check for closing column separator (for headerless tables)
2489        if is_column_sep_start && is_column_separator(line) && content_line_count > 0 {
2490            found_closing_sep = true;
2491            pos += 1;
2492            break;
2493        }
2494
2495        // Content row
2496        content_line_count += 1;
2497        pos += 1;
2498    }
2499
2500    // Must have found a column separator to be a valid multiline table
2501    if !found_column_sep {
2502        return None;
2503    }
2504
2505    // Must have had at least one blank line between rows (distinguishes from simple tables)
2506    if !found_blank_line {
2507        if !is_column_sep_start {
2508            return None;
2509        }
2510        let columns = headerless_columns.as_deref()?;
2511        if !is_headerless_single_row_without_blank(&stripped, start_pos + 1, pos - 1, columns) {
2512            return None;
2513        }
2514    }
2515
2516    // Must have a closing separator
2517    if !found_closing_sep {
2518        return None;
2519    }
2520
2521    // Must have consumed more than just the opening separator
2522    if pos <= start_pos + 2 {
2523        return None;
2524    }
2525
2526    let end_pos = pos;
2527
2528    // Extract column boundaries from the separator line
2529    let columns = try_parse_table_separator(stripped[column_sep_pos])
2530        .expect("Column separator must be valid");
2531
2532    // Check for caption before table
2533    let caption_before = find_caption_before_table(&stripped, start_pos);
2534
2535    // Check for caption after table
2536    let caption_after = if caption_before.is_some() {
2537        None
2538    } else {
2539        find_caption_after_table(&stripped, end_pos)
2540    };
2541
2542    // Build the multiline table
2543    builder.start_node(SyntaxKind::MULTILINE_TABLE.into());
2544
2545    // Emit caption before if present
2546    if let Some((cap_start, cap_end)) = caption_before {
2547        emit_table_caption(builder, lines, cap_start, cap_end, config);
2548
2549        // Emit blank line between caption and table if present
2550        if cap_end < start_pos {
2551            for line in lines.iter().take(start_pos).skip(cap_end) {
2552                if line.trim().is_empty() {
2553                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2554                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2555                    builder.finish_node();
2556                }
2557            }
2558        }
2559    }
2560
2561    // Emit opening separator. The dispatch line's prefix was already consumed
2562    // by core (`dispatch_tail`); a non-dispatch start (caption-before case)
2563    // re-emits its `  > ` prefix via `emit_prefix_at`.
2564    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2565    let tail = window.emit_or_dispatch_tail(builder, start_pos);
2566    emit_line_tokens(builder, tail);
2567    builder.finish_node();
2568
2569    // Track state for emitting. Accumulate ABSOLUTE indices of the lines making
2570    // up a multi-line row so each line's container prefix can be re-emitted via
2571    // the window.
2572    let mut in_header = has_header;
2573    let mut current_row_indices: Vec<usize> = Vec::new();
2574
2575    for (i, &line) in stripped
2576        .iter()
2577        .enumerate()
2578        .take(end_pos)
2579        .skip(start_pos + 1)
2580    {
2581        // Column separator (header/body divider)
2582        if i == column_sep_pos {
2583            // Emit any accumulated header lines
2584            if !current_row_indices.is_empty() {
2585                emit_multiline_table_row(
2586                    builder,
2587                    window,
2588                    &current_row_indices,
2589                    &columns,
2590                    SyntaxKind::TABLE_HEADER,
2591                    config,
2592                );
2593                current_row_indices.clear();
2594            }
2595
2596            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2597            let tail = window.emit_or_dispatch_tail(builder, i);
2598            emit_line_tokens(builder, tail);
2599            builder.finish_node();
2600            in_header = false;
2601            continue;
2602        }
2603
2604        // Closing separator (full-width or column separator at end)
2605        if try_parse_multiline_separator(line).is_some() || is_column_separator(line) {
2606            // Emit any accumulated row lines
2607            if !current_row_indices.is_empty() {
2608                let kind = if in_header {
2609                    SyntaxKind::TABLE_HEADER
2610                } else {
2611                    SyntaxKind::TABLE_ROW
2612                };
2613                emit_multiline_table_row(
2614                    builder,
2615                    window,
2616                    &current_row_indices,
2617                    &columns,
2618                    kind,
2619                    config,
2620                );
2621                current_row_indices.clear();
2622            }
2623
2624            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2625            let tail = window.emit_or_dispatch_tail(builder, i);
2626            emit_line_tokens(builder, tail);
2627            builder.finish_node();
2628            continue;
2629        }
2630
2631        // Blank line (row separator)
2632        if line.trim().is_empty() {
2633            // Emit accumulated row
2634            if !current_row_indices.is_empty() {
2635                let kind = if in_header {
2636                    SyntaxKind::TABLE_HEADER
2637                } else {
2638                    SyntaxKind::TABLE_ROW
2639                };
2640                emit_multiline_table_row(
2641                    builder,
2642                    window,
2643                    &current_row_indices,
2644                    &columns,
2645                    kind,
2646                    config,
2647                );
2648                current_row_indices.clear();
2649            }
2650
2651            // Re-emit the interior `>`-only separator row's container prefix
2652            // (`  > `) inside the BLANK_LINE node so it round-trips losslessly.
2653            builder.start_node(SyntaxKind::BLANK_LINE.into());
2654            let tail = window.emit_or_dispatch_tail(builder, i);
2655            builder.token(SyntaxKind::BLANK_LINE.into(), tail);
2656            builder.finish_node();
2657            continue;
2658        }
2659
2660        // Content line - accumulate for current row
2661        current_row_indices.push(i);
2662    }
2663
2664    // Emit any remaining accumulated lines
2665    if !current_row_indices.is_empty() {
2666        let kind = if in_header {
2667            SyntaxKind::TABLE_HEADER
2668        } else {
2669            SyntaxKind::TABLE_ROW
2670        };
2671        emit_multiline_table_row(
2672            builder,
2673            window,
2674            &current_row_indices,
2675            &columns,
2676            kind,
2677            config,
2678        );
2679    }
2680
2681    // Emit caption after if present
2682    if let Some((cap_start, cap_end)) = caption_after {
2683        if cap_start > end_pos {
2684            for line in lines.iter().take(cap_start).skip(end_pos) {
2685                if line.trim().is_empty() {
2686                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2687                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2688                    builder.finish_node();
2689                }
2690            }
2691        }
2692        emit_table_caption(builder, lines, cap_start, cap_end, config);
2693    }
2694
2695    builder.finish_node(); // MultilineTable
2696
2697    // Calculate lines consumed
2698    let table_start = caption_before.map(|(start, _)| start).unwrap_or(start_pos);
2699    let table_end = if let Some((_, cap_end)) = caption_after {
2700        cap_end
2701    } else {
2702        end_pos
2703    };
2704
2705    Some(table_end - table_start)
2706}
2707
2708/// Extract cell contents from first line only (for CST emission).
2709/// Multi-line content will be in continuation TEXT tokens.
2710fn extract_first_line_cell_contents(line: &str, columns: &[Column]) -> Vec<String> {
2711    let (line_content, _) = strip_newline(line);
2712    let mut cells = Vec::new();
2713
2714    for column in columns.iter() {
2715        let column_start = column_offset_to_byte_index(line_content, column.start);
2716        let column_end = column_offset_to_byte_index(line_content, column.end);
2717
2718        // Extract FULL text for this column (including whitespace)
2719        let cell_text = if column_start < column_end {
2720            &line_content[column_start..column_end]
2721        } else if column_start < line_content.len() {
2722            &line_content[column_start..]
2723        } else {
2724            ""
2725        };
2726
2727        cells.push(cell_text.to_string());
2728    }
2729
2730    cells
2731}
2732
2733/// Emit a multiline table row with inline parsing (Phase 7.1).
2734///
2735/// `indices` are ABSOLUTE line indices into the window's raw buffer; each
2736/// physical line re-emits its container prefix (`  > `) via the window before
2737/// its content. With an empty prefix the tails equal the raw lines, so emission
2738/// is byte-identical to the pre-window path.
2739fn emit_multiline_table_row(
2740    builder: &mut GreenNodeBuilder<'static>,
2741    window: &StrippedLines<'_, '_>,
2742    indices: &[usize],
2743    columns: &[Column],
2744    kind: SyntaxKind,
2745    config: &ParserOptions,
2746) {
2747    if indices.is_empty() {
2748        return;
2749    }
2750
2751    builder.start_node(kind.into());
2752
2753    // Emit the first line's container prefix as tokens, then slice cells from
2754    // the prefix-stripped tail (for CST losslessness, only the first physical
2755    // line is parsed into cells; continuation lines stay verbatim TEXT).
2756    let first_line = window.emit_or_dispatch_tail(builder, indices[0]);
2757    let cell_contents = extract_first_line_cell_contents(first_line, columns);
2758    let (trimmed, newline_str) = strip_newline(first_line);
2759    let mut current_pos = 0;
2760
2761    for (col_idx, column) in columns.iter().enumerate() {
2762        let cell_text = &cell_contents[col_idx];
2763        let cell_start = column_offset_to_byte_index(trimmed, column.start);
2764        let cell_end = column_offset_to_byte_index(trimmed, column.end);
2765
2766        // Emit whitespace before cell
2767        if current_pos < cell_start {
2768            builder.token(
2769                SyntaxKind::WHITESPACE.into(),
2770                &trimmed[current_pos..cell_start],
2771            );
2772        }
2773
2774        // Emit cell with inline parsing (first line content only)
2775        emit_table_cell(builder, cell_text, config);
2776
2777        current_pos = cell_end;
2778    }
2779
2780    // Emit trailing whitespace
2781    if current_pos < trimmed.len() {
2782        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
2783    }
2784
2785    // Emit newline
2786    if !newline_str.is_empty() {
2787        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2788    }
2789
2790    // Emit continuation lines as TEXT to preserve exact line structure,
2791    // re-emitting each line's container prefix first.
2792    for &idx in &indices[1..] {
2793        let tail = window.emit_or_dispatch_tail(builder, idx);
2794        emit_line_tokens(builder, tail);
2795    }
2796
2797    builder.finish_node();
2798}
2799
2800#[cfg(test)]
2801mod multiline_table_tests {
2802    use super::super::container_prefix::ContainerPrefix;
2803    use super::*;
2804    use crate::syntax::SyntaxNode;
2805
2806    #[test]
2807    fn test_multiline_separator_detection() {
2808        assert!(
2809            try_parse_multiline_separator(
2810                "-------------------------------------------------------------"
2811            )
2812            .is_some()
2813        );
2814        assert!(try_parse_multiline_separator("---").is_some());
2815        assert!(try_parse_multiline_separator("  -----").is_some()); // with leading spaces
2816        assert!(try_parse_multiline_separator("--").is_none()); // too short
2817        assert!(try_parse_multiline_separator("--- ---").is_none()); // has spaces
2818        assert!(try_parse_multiline_separator("+---+").is_none()); // grid separator
2819    }
2820
2821    #[test]
2822    fn test_basic_multiline_table() {
2823        let input = vec![
2824            "-------------------------------------------------------------",
2825            " Centered   Default           Right Left",
2826            "  Header    Aligned         Aligned Aligned",
2827            "----------- ------- --------------- -------------------------",
2828            "   First    row                12.0 Example of a row that",
2829            "                                    spans multiple lines.",
2830            "",
2831            "  Second    row                 5.0 Here's another one.",
2832            "-------------------------------------------------------------",
2833            "",
2834        ];
2835
2836        let mut builder = GreenNodeBuilder::new();
2837        let prefix = ContainerPrefix::default();
2838        let window = StrippedLines::new(&input, 0, &prefix);
2839        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2840
2841        assert!(result.is_some());
2842        assert_eq!(result.unwrap(), 9);
2843    }
2844
2845    #[test]
2846    fn test_multiline_table_headerless() {
2847        let input = vec![
2848            "----------- ------- --------------- -------------------------",
2849            "   First    row                12.0 Example of a row that",
2850            "                                    spans multiple lines.",
2851            "",
2852            "  Second    row                 5.0 Here's another one.",
2853            "----------- ------- --------------- -------------------------",
2854            "",
2855        ];
2856
2857        let mut builder = GreenNodeBuilder::new();
2858        let prefix = ContainerPrefix::default();
2859        let window = StrippedLines::new(&input, 0, &prefix);
2860        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2861
2862        assert!(result.is_some());
2863        assert_eq!(result.unwrap(), 6);
2864    }
2865
2866    #[test]
2867    fn test_multiline_table_headerless_single_line_is_not_multiline() {
2868        let input = vec![
2869            "-------     ------ ----------   -------",
2870            "     12     12        12             12",
2871            "-------     ------ ----------   -------",
2872            "",
2873            "Not part of table.",
2874            "",
2875        ];
2876
2877        let mut builder = GreenNodeBuilder::new();
2878        let prefix = ContainerPrefix::default();
2879        let window = StrippedLines::new(&input, 0, &prefix);
2880        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2881
2882        assert!(result.is_none());
2883    }
2884
2885    #[test]
2886    fn test_multiline_table_headerless_single_row_continuation_without_blank_line() {
2887        let input = vec![
2888            "----------  ---------  -----------  ---------------------------",
2889            "   First    row               12.0  Example of a row that spans",
2890            "                                    multiple lines.",
2891            "----------  ---------  -----------  ---------------------------",
2892            "",
2893        ];
2894
2895        let mut builder = GreenNodeBuilder::new();
2896        let prefix = ContainerPrefix::default();
2897        let window = StrippedLines::new(&input, 0, &prefix);
2898        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2899
2900        assert!(result.is_some());
2901        assert_eq!(result.unwrap(), 4);
2902    }
2903
2904    #[test]
2905    fn test_multiline_table_with_caption() {
2906        let input = vec![
2907            "-------------------------------------------------------------",
2908            " Col1       Col2",
2909            "----------- -------",
2910            "   A        B",
2911            "",
2912            "-------------------------------------------------------------",
2913            "",
2914            "Table: Here's the caption.",
2915            "",
2916        ];
2917
2918        let mut builder = GreenNodeBuilder::new();
2919        let prefix = ContainerPrefix::default();
2920        let window = StrippedLines::new(&input, 0, &prefix);
2921        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2922
2923        assert!(result.is_some());
2924        // table (6 lines) + blank + caption
2925        assert_eq!(result.unwrap(), 8);
2926    }
2927
2928    #[test]
2929    fn test_multiline_table_single_row() {
2930        let input = vec![
2931            "---------------------------------------------",
2932            " Header1    Header2",
2933            "----------- -----------",
2934            "   Data     More data",
2935            "",
2936            "---------------------------------------------",
2937            "",
2938        ];
2939
2940        let mut builder = GreenNodeBuilder::new();
2941        let prefix = ContainerPrefix::default();
2942        let window = StrippedLines::new(&input, 0, &prefix);
2943        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2944
2945        assert!(result.is_some());
2946        assert_eq!(result.unwrap(), 6);
2947    }
2948
2949    #[test]
2950    fn test_headerless_multiline_table_does_not_close_on_full_width_rule() {
2951        let input = vec![
2952            "- - - - -",
2953            "Third section with underscores.",
2954            "",
2955            "_____",
2956            "",
2957            "> Quote before rule",
2958            ">",
2959            "> ***",
2960            ">",
2961            "> Quote after rule",
2962            "",
2963            "Final paragraph.",
2964            "",
2965            "Here's a horizontal rule:",
2966            "",
2967            "---",
2968            "Text directly after the horizontal rule.",
2969            "",
2970        ];
2971
2972        let mut builder = GreenNodeBuilder::new();
2973        let prefix = ContainerPrefix::default();
2974        let window = StrippedLines::new(&input, 0, &prefix);
2975        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2976
2977        assert!(result.is_none());
2978    }
2979
2980    #[test]
2981    fn test_not_multiline_table() {
2982        // Simple table should not be parsed as multiline
2983        let input = vec![
2984            "  Right     Left     Center     Default",
2985            "-------     ------ ----------   -------",
2986            "     12     12        12            12",
2987            "",
2988        ];
2989
2990        let mut builder = GreenNodeBuilder::new();
2991        let prefix = ContainerPrefix::default();
2992        let window = StrippedLines::new(&input, 0, &prefix);
2993        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2994
2995        // Should not parse because first line isn't a full-width separator
2996        assert!(result.is_none());
2997    }
2998
2999    // Phase 7.1: Unit tests for emit_table_cell() helper
3000    #[test]
3001    fn test_emit_table_cell_plain_text() {
3002        let mut builder = GreenNodeBuilder::new();
3003        emit_table_cell(&mut builder, "Cell", &ParserOptions::default());
3004        let green = builder.finish();
3005        let node = SyntaxNode::new_root(green);
3006
3007        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3008        assert_eq!(node.text(), "Cell");
3009
3010        // Should have TEXT child
3011        let children: Vec<_> = node.children_with_tokens().collect();
3012        assert_eq!(children.len(), 1);
3013        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
3014    }
3015
3016    #[test]
3017    fn test_emit_table_cell_with_emphasis() {
3018        let mut builder = GreenNodeBuilder::new();
3019        emit_table_cell(&mut builder, "*italic*", &ParserOptions::default());
3020        let green = builder.finish();
3021        let node = SyntaxNode::new_root(green);
3022
3023        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3024        assert_eq!(node.text(), "*italic*");
3025
3026        // Should have EMPHASIS child
3027        let children: Vec<_> = node.children().collect();
3028        assert_eq!(children.len(), 1);
3029        assert_eq!(children[0].kind(), SyntaxKind::EMPHASIS);
3030    }
3031
3032    #[test]
3033    fn test_emit_table_cell_with_code() {
3034        let mut builder = GreenNodeBuilder::new();
3035        emit_table_cell(&mut builder, "`code`", &ParserOptions::default());
3036        let green = builder.finish();
3037        let node = SyntaxNode::new_root(green);
3038
3039        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3040        assert_eq!(node.text(), "`code`");
3041
3042        // Should have CODE_SPAN child
3043        let children: Vec<_> = node.children().collect();
3044        assert_eq!(children.len(), 1);
3045        assert_eq!(children[0].kind(), SyntaxKind::INLINE_CODE);
3046    }
3047
3048    #[test]
3049    fn test_emit_table_cell_with_link() {
3050        let mut builder = GreenNodeBuilder::new();
3051        emit_table_cell(&mut builder, "[text](url)", &ParserOptions::default());
3052        let green = builder.finish();
3053        let node = SyntaxNode::new_root(green);
3054
3055        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3056        assert_eq!(node.text(), "[text](url)");
3057
3058        // Should have LINK child
3059        let children: Vec<_> = node.children().collect();
3060        assert_eq!(children.len(), 1);
3061        assert_eq!(children[0].kind(), SyntaxKind::LINK);
3062    }
3063
3064    #[test]
3065    fn test_emit_table_cell_with_strong() {
3066        let mut builder = GreenNodeBuilder::new();
3067        emit_table_cell(&mut builder, "**bold**", &ParserOptions::default());
3068        let green = builder.finish();
3069        let node = SyntaxNode::new_root(green);
3070
3071        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3072        assert_eq!(node.text(), "**bold**");
3073
3074        // Should have STRONG child
3075        let children: Vec<_> = node.children().collect();
3076        assert_eq!(children.len(), 1);
3077        assert_eq!(children[0].kind(), SyntaxKind::STRONG);
3078    }
3079
3080    #[test]
3081    fn test_emit_table_cell_mixed_inline() {
3082        let mut builder = GreenNodeBuilder::new();
3083        emit_table_cell(
3084            &mut builder,
3085            "Text **bold** and `code`",
3086            &ParserOptions::default(),
3087        );
3088        let green = builder.finish();
3089        let node = SyntaxNode::new_root(green);
3090
3091        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3092        assert_eq!(node.text(), "Text **bold** and `code`");
3093
3094        // Should have multiple children: TEXT, STRONG, TEXT, CODE_SPAN
3095        let children: Vec<_> = node.children_with_tokens().collect();
3096        assert!(children.len() >= 4);
3097
3098        // Check some expected types
3099        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
3100        assert_eq!(children[1].kind(), SyntaxKind::STRONG);
3101    }
3102
3103    #[test]
3104    fn test_emit_table_cell_empty() {
3105        let mut builder = GreenNodeBuilder::new();
3106        emit_table_cell(&mut builder, "", &ParserOptions::default());
3107        let green = builder.finish();
3108        let node = SyntaxNode::new_root(green);
3109
3110        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3111        assert_eq!(node.text(), "");
3112
3113        // Empty cell should have no children
3114        let children: Vec<_> = node.children_with_tokens().collect();
3115        assert_eq!(children.len(), 0);
3116    }
3117
3118    #[test]
3119    fn test_emit_table_cell_escaped_pipe() {
3120        let mut builder = GreenNodeBuilder::new();
3121        emit_table_cell(&mut builder, r"A \| B", &ParserOptions::default());
3122        let green = builder.finish();
3123        let node = SyntaxNode::new_root(green);
3124
3125        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3126        // The escaped pipe should be preserved
3127        assert_eq!(node.text(), r"A \| B");
3128    }
3129}