Skip to main content

panache_parser/parser/blocks/
tables.rs

1//! Simple table parsing for Pandoc's simple_tables extension.
2
3use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6use unicode_width::UnicodeWidthChar;
7
8use crate::parser::utils::attributes::{
9    emit_attribute_node, try_parse_trailing_attributes_with_pos,
10};
11use crate::parser::utils::helpers::{emit_line_tokens, strip_newline};
12use crate::parser::utils::inline_emission;
13
14use super::container_prefix::StrippedLines;
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum Alignment {
18    Left,
19    Right,
20    Center,
21    Default,
22}
23
24/// Column information extracted from the separator line.
25#[derive(Debug, Clone)]
26pub(crate) struct Column {
27    /// Start position (byte index) in the line
28    start: usize,
29    /// End position (byte index) in the line
30    end: usize,
31    /// Column alignment
32    alignment: Alignment,
33}
34
35/// Try to detect if a line is a table separator line.
36/// Returns Some(column positions) if it's a valid separator.
37pub(crate) fn try_parse_table_separator(line: &str) -> Option<Vec<Column>> {
38    let trimmed = line.trim_start();
39    // Strip trailing newline if present (CRLF or LF)
40    let (trimmed, newline_str) = strip_newline(trimmed);
41    let leading_spaces = line.len() - trimmed.len() - newline_str.len();
42
43    // Must have leading spaces <= 3 to not be a code block
44    if leading_spaces > 3 {
45        return None;
46    }
47
48    // Simple tables only use dashed separators.
49    if trimmed.contains('*') || trimmed.contains('_') {
50        return None;
51    }
52
53    // Must contain at least one dash
54    if !trimmed.contains('-') {
55        return None;
56    }
57
58    // A separator line consists of dashes and spaces
59    if !trimmed.chars().all(|c| c == '-' || c == ' ') {
60        return None;
61    }
62
63    // Must not be a horizontal rule.
64    let dash_groups: Vec<_> = trimmed.split(' ').filter(|s| !s.is_empty()).collect();
65    if dash_groups.len() <= 1 {
66        return None;
67    }
68
69    // Extract column positions from dash groups
70    let columns = extract_columns(trimmed, leading_spaces);
71
72    if columns.is_empty() {
73        return None;
74    }
75
76    Some(columns)
77}
78
79/// Extract column positions from a separator line.
80fn extract_columns(separator: &str, offset: usize) -> Vec<Column> {
81    let mut columns = Vec::new();
82    let mut in_dashes = false;
83    let mut col_start = 0;
84
85    for (i, ch) in separator.char_indices() {
86        match ch {
87            '-' if !in_dashes => {
88                col_start = i + offset;
89                in_dashes = true;
90            }
91            ' ' if in_dashes => {
92                columns.push(Column {
93                    start: col_start,
94                    end: i + offset,
95                    alignment: Alignment::Default, // Will be determined later
96                });
97                in_dashes = false;
98            }
99            _ => {}
100        }
101    }
102
103    // Handle last column
104    if in_dashes {
105        columns.push(Column {
106            start: col_start,
107            end: separator.len() + offset,
108            alignment: Alignment::Default,
109        });
110    }
111
112    columns
113}
114
115/// Convert a character column offset into a UTF-8 byte index for `line`.
116///
117/// Simple-table column boundaries come from ASCII separator lines where
118/// character and byte offsets are identical. Data rows may contain multibyte
119/// characters, so we must remap offsets before slicing.
120fn column_offset_to_byte_index(line: &str, offset: usize) -> usize {
121    line.char_indices()
122        .nth(offset)
123        .map_or(line.len(), |(byte_idx, _)| byte_idx)
124}
125
126/// Try to parse a table caption from a line.
127/// Returns Some((prefix_len, caption_text)) if it's a caption.
128fn try_parse_caption_prefix(line: &str) -> Option<(usize, &str)> {
129    let trimmed = line.trim_start();
130    let leading_spaces = line.len() - trimmed.len();
131
132    // Must have leading spaces <= 3 to not be a code block
133    if leading_spaces > 3 {
134        return None;
135    }
136
137    // Check for "Table:" or "table:" or just ":".
138    if let Some(rest) = trimmed.strip_prefix("Table:") {
139        Some((leading_spaces + 6, rest))
140    } else if let Some(rest) = trimmed.strip_prefix("table:") {
141        Some((leading_spaces + 6, rest))
142    } else if let Some(rest) = trimmed.strip_prefix(':') {
143        // Just ":" caption markers must be followed by whitespace (Pandoc-style).
144        // This avoids accidentally treating constructs like fenced div fences ":::" as captions.
145        if rest.starts_with(|c: char| c.is_whitespace()) {
146            Some((leading_spaces + 1, rest))
147        } else {
148            None
149        }
150    } else {
151        None
152    }
153}
154
155/// Check if a line could be the start of a table caption.
156fn is_table_caption_start(line: &str) -> bool {
157    try_parse_caption_prefix(line).is_some()
158}
159
160fn is_bare_colon_caption_start(line: &str) -> bool {
161    let trimmed = line.trim_start();
162    trimmed.starts_with(':') && !trimmed.starts_with("::") && !trimmed.starts_with(":::")
163}
164
165fn bare_colon_caption_looks_like_definition_code_block(line: &str) -> bool {
166    let Some((_, rest)) = try_parse_caption_prefix(line) else {
167        return false;
168    };
169    let trimmed = rest.trim_start();
170    trimmed.starts_with("```") || trimmed.starts_with("~~~")
171}
172
173fn line_is_fenced_div_fence(line: &str) -> bool {
174    let trimmed = line.trim_start();
175    let colon_count = trimmed.chars().take_while(|&c| c == ':').count();
176    if colon_count < 3 {
177        return false;
178    }
179    let rest = &trimmed[colon_count..];
180    rest.is_empty() || rest.starts_with(char::is_whitespace)
181}
182
183fn is_valid_caption_start_before_table(lines: &[&str], pos: usize) -> bool {
184    if !is_table_caption_start(lines[pos]) {
185        return false;
186    }
187
188    if is_bare_colon_caption_start(lines[pos])
189        && bare_colon_caption_looks_like_definition_code_block(lines[pos])
190    {
191        return false;
192    }
193
194    // Avoid stealing definition-list definitions (":   ...") as table captions.
195    if is_bare_colon_caption_start(lines[pos])
196        && pos > 0
197        && !lines[pos - 1].trim().is_empty()
198        && !line_is_fenced_div_fence(lines[pos - 1])
199    {
200        return false;
201    }
202    true
203}
204
205/// Check if a line could be the start of a grid table.
206/// Grid tables start with a separator line like +---+---+ or +===+===+
207fn is_grid_table_start(line: &str) -> bool {
208    try_parse_grid_separator(line).is_some()
209}
210
211/// Check if a line could be the start of a multiline table.
212/// Multiline tables start with either:
213/// - A full-width dash separator (----)
214/// - A column separator with dashes and spaces (---- ---- ----)
215fn is_multiline_table_start(line: &str) -> bool {
216    try_parse_multiline_separator(line).is_some() || is_column_separator(line)
217}
218
219/// Check if there's a table following a potential caption at this position.
220/// This is used to avoid parsing a caption as a paragraph when it belongs to a table.
221pub(crate) fn is_caption_followed_by_table(lines: &[&str], caption_pos: usize) -> bool {
222    if caption_pos >= lines.len() {
223        return false;
224    }
225
226    // Caption must start with a caption prefix
227    if !is_valid_caption_start_before_table(lines, caption_pos) {
228        return false;
229    }
230
231    let mut pos = caption_pos + 1;
232
233    // Skip continuation lines of caption (non-blank lines).
234    // Stop at fenced-div fences (`:::`) — those close the enclosing div and
235    // must not be folded into the caption.
236    while pos < lines.len()
237        && !lines[pos].trim().is_empty()
238        && !line_is_fenced_div_fence(lines[pos])
239    {
240        // If we hit a table separator, we found a table
241        if try_parse_table_separator(lines[pos]).is_some() {
242            return true;
243        }
244        pos += 1;
245    }
246
247    // Skip one blank line
248    if pos < lines.len() && lines[pos].trim().is_empty() {
249        pos += 1;
250    }
251
252    // Check for table at next position
253    if pos < lines.len() {
254        let line = lines[pos];
255
256        // Check for grid table start (+---+---+ or +===+===+)
257        if is_grid_table_start(line) {
258            return true;
259        }
260
261        // Check for multiline table start (---- or ---- ---- ----)
262        if is_multiline_table_start(line) {
263            return true;
264        }
265
266        // Could be a separator line (simple/pipe table, headerless)
267        if try_parse_table_separator(line).is_some() {
268            return true;
269        }
270
271        // Or could be a header line followed by separator (simple/pipe table with header)
272        if pos + 1 < lines.len() && !line.trim().is_empty() {
273            let next_line = lines[pos + 1];
274            if try_parse_table_separator(next_line).is_some()
275                || try_parse_pipe_separator(next_line).is_some()
276            {
277                return true;
278            }
279        }
280    }
281
282    false
283}
284
285fn caption_range_starting_at(lines: &[&str], start: usize) -> Option<(usize, usize)> {
286    if start >= lines.len() || !is_table_caption_start(lines[start]) {
287        return None;
288    }
289    let mut end = start + 1;
290    while end < lines.len()
291        && !lines[end].trim().is_empty()
292        && !line_is_fenced_div_fence(lines[end])
293    {
294        end += 1;
295    }
296    Some((start, end))
297}
298
299/// Find caption before table (if any).
300/// Returns (caption_start, caption_end) positions, or None.
301fn find_caption_before_table(lines: &[&str], table_start: usize) -> Option<(usize, usize)> {
302    if table_start == 0 {
303        return None;
304    }
305
306    // Look backward for a caption
307    // Caption must be immediately before table (with possible blank line between)
308    let mut pos = table_start - 1;
309
310    // Skip one blank line if present
311    if lines[pos].trim().is_empty() {
312        if pos == 0 {
313            return None;
314        }
315        pos -= 1;
316    }
317
318    // Now pos points to the last non-blank line before the table
319    // This could be the last line of a multiline caption, or a single-line caption
320    let caption_end = pos + 1; // End is exclusive
321
322    // If this line is NOT a caption start, it might be a continuation line
323    // Scan backward through non-blank lines to find the caption start
324    if !is_valid_caption_start_before_table(lines, pos) {
325        // Not a caption start - check if there's a caption start above
326        let mut scan_pos = pos;
327        while scan_pos > 0 {
328            scan_pos -= 1;
329            let line = lines[scan_pos];
330
331            // If we hit a blank line or fenced-div fence, we've gone too far
332            if line.trim().is_empty() || line_is_fenced_div_fence(line) {
333                return None;
334            }
335
336            // If we find a caption start, this is the beginning of the multiline caption
337            if is_valid_caption_start_before_table(lines, scan_pos) {
338                if scan_pos > 0 && !lines[scan_pos - 1].trim().is_empty() {
339                    return None;
340                }
341                if previous_nonblank_looks_like_table(lines, scan_pos) {
342                    return None;
343                }
344                return Some((scan_pos, caption_end));
345            }
346        }
347        // Scanned to beginning without finding caption start
348        None
349    } else {
350        if pos > 0 && !lines[pos - 1].trim().is_empty() {
351            return None;
352        }
353        if previous_nonblank_looks_like_table(lines, pos) {
354            return None;
355        }
356        // This line is a caption start - return the range
357        Some((pos, caption_end))
358    }
359}
360
361fn previous_nonblank_looks_like_table(lines: &[&str], pos: usize) -> bool {
362    if pos == 0 {
363        return false;
364    }
365    let mut i = pos;
366    while i > 0 {
367        i -= 1;
368        let line = lines[i].trim();
369        if line.is_empty() {
370            continue;
371        }
372        return line_looks_like_table_syntax(line);
373    }
374    false
375}
376
377fn line_looks_like_table_syntax(line: &str) -> bool {
378    if line.starts_with('|') && line.matches('|').count() >= 2 {
379        return true;
380    }
381    if line.starts_with('+') && line.ends_with('+') && (line.contains('-') || line.contains('=')) {
382        return true;
383    }
384    try_parse_table_separator(line).is_some()
385        || try_parse_pipe_separator(line).is_some()
386        || try_parse_grid_separator(line).is_some()
387}
388
389/// Find caption after table (if any).
390/// Returns (caption_start, caption_end) positions, or None.
391fn find_caption_after_table(lines: &[&str], table_end: usize) -> Option<(usize, usize)> {
392    if table_end >= lines.len() {
393        return None;
394    }
395
396    let mut pos = table_end;
397
398    // Skip one blank line if present
399    if pos < lines.len() && lines[pos].trim().is_empty() {
400        pos += 1;
401    }
402
403    if pos >= lines.len() {
404        return None;
405    }
406
407    // Check if this line is a caption
408    if is_table_caption_start(lines[pos]) {
409        let caption_start = pos;
410        // Find end of caption (continues until blank line or fenced-div fence)
411        let mut caption_end = caption_start + 1;
412        while caption_end < lines.len()
413            && !lines[caption_end].trim().is_empty()
414            && !line_is_fenced_div_fence(lines[caption_end])
415        {
416            caption_end += 1;
417        }
418        Some((caption_start, caption_end))
419    } else {
420        None
421    }
422}
423
424/// Emit a table caption node.
425/// Emit caption text for a single line. If `lift_trailing_attrs` is set and
426/// the text ends with a balanced `{...}` block, lift it into a structural
427/// `ATTRIBUTE` node so `AttributeNode::cast` finds its id (matches Pandoc's
428/// `+caption_attributes` behavior — `: caption {#tbl-id}` gives the table
429/// the id).
430fn emit_caption_line_text(
431    builder: &mut GreenNodeBuilder<'static>,
432    text_with_newline: &str,
433    config: &ParserOptions,
434    lift_trailing_attrs: bool,
435) {
436    let (text, newline_str) = strip_newline(text_with_newline);
437
438    if lift_trailing_attrs
439        && !text.is_empty()
440        && let Some((_attrs, before_attrs, start_brace_pos)) =
441            try_parse_trailing_attributes_with_pos(text)
442    {
443        let trimmed_len = text.trim_end().len();
444        let space = &text[before_attrs.len()..start_brace_pos];
445        let raw_attrs = &text[start_brace_pos..trimmed_len];
446        let trailing_ws = &text[trimmed_len..];
447
448        if !before_attrs.is_empty() {
449            inline_emission::emit_inlines(builder, before_attrs, config, false);
450        }
451        if !space.is_empty() {
452            builder.token(SyntaxKind::WHITESPACE.into(), space);
453        }
454        emit_attribute_node(builder, raw_attrs);
455        if !trailing_ws.is_empty() {
456            builder.token(SyntaxKind::WHITESPACE.into(), trailing_ws);
457        }
458        if !newline_str.is_empty() {
459            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
460        }
461        return;
462    }
463
464    if !text.is_empty() {
465        inline_emission::emit_inlines(builder, text, config, false);
466    }
467    if !newline_str.is_empty() {
468        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
469    }
470}
471
472fn emit_table_caption(
473    builder: &mut GreenNodeBuilder<'static>,
474    lines: &[&str],
475    start: usize,
476    end: usize,
477    config: &ParserOptions,
478) {
479    builder.start_node(SyntaxKind::TABLE_CAPTION.into());
480
481    let last_idx = (end - start).saturating_sub(1);
482
483    for (i, line) in lines[start..end].iter().enumerate() {
484        let lift_attrs = i == last_idx;
485        if i == 0 {
486            // First line - parse and emit prefix separately
487            let trimmed = line.trim_start();
488            let leading_ws_len = line.len() - trimmed.len();
489
490            // Emit leading whitespace if present
491            if leading_ws_len > 0 {
492                builder.token(SyntaxKind::WHITESPACE.into(), &line[..leading_ws_len]);
493            }
494
495            // Check for caption prefix and emit separately
496            // Calculate where the prefix ends (after trimmed content)
497            let prefix_and_rest = if line.ends_with('\n') {
498                &line[leading_ws_len..line.len() - 1] // Exclude newline
499            } else {
500                &line[leading_ws_len..]
501            };
502
503            let (prefix_len, prefix_text) = if prefix_and_rest.starts_with("Table: ") {
504                (7, "Table: ")
505            } else if prefix_and_rest.starts_with("table: ") {
506                (7, "table: ")
507            } else if prefix_and_rest.starts_with(": ") {
508                (2, ": ")
509            } else if prefix_and_rest.starts_with(':') {
510                (1, ":")
511            } else {
512                (0, "")
513            };
514
515            if prefix_len > 0 {
516                builder.token(SyntaxKind::TABLE_CAPTION_PREFIX.into(), prefix_text);
517
518                // Emit rest of line after prefix
519                let rest_start = leading_ws_len + prefix_len;
520                if rest_start < line.len() {
521                    emit_caption_line_text(builder, &line[rest_start..], config, lift_attrs);
522                }
523            } else {
524                // No recognized prefix, emit whole trimmed line
525                emit_caption_line_text(builder, &line[leading_ws_len..], config, lift_attrs);
526            }
527        } else {
528            // Continuation lines - emit with inline parsing (attrs only on last line).
529            emit_caption_line_text(builder, line, config, lift_attrs);
530        }
531    }
532
533    builder.finish_node(); // TABLE_CAPTION
534}
535
536/// Emit a table cell with inline content parsing.
537/// This is the core helper for Phase 7.1 table inline parsing migration.
538fn emit_table_cell(
539    builder: &mut GreenNodeBuilder<'static>,
540    cell_text: &str,
541    config: &ParserOptions,
542) {
543    builder.start_node(SyntaxKind::TABLE_CELL.into());
544
545    // Parse inline content within the cell
546    if !cell_text.is_empty() {
547        inline_emission::emit_inlines(builder, cell_text, config, false);
548    }
549
550    builder.finish_node(); // TABLE_CELL
551}
552
553/// Determine column alignments based on separator and optional header.
554fn determine_alignments(columns: &mut [Column], separator_line: &str, header_line: Option<&str>) {
555    for col in columns.iter_mut() {
556        let sep_slice = &separator_line[col.start..col.end];
557
558        if let Some(header) = header_line {
559            let header_start = column_offset_to_byte_index(header, col.start);
560            let header_end = column_offset_to_byte_index(header, col.end);
561
562            // Extract header text for this column
563            let header_text = if header_start < header_end {
564                header[header_start..header_end].trim()
565            } else if header_start < header.len() {
566                header[header_start..].trim()
567            } else {
568                ""
569            };
570
571            if header_text.is_empty() {
572                col.alignment = Alignment::Default;
573                continue;
574            }
575
576            // Find where the header text starts and ends within the column
577            let header_in_col = &header[header_start..header_end];
578            let text_start = header_in_col.len() - header_in_col.trim_start().len();
579            let text_end = header_in_col.trim_end().len() + text_start;
580
581            // Check dash alignment relative to text
582            let dashes_start = 0; // Dashes start at beginning of sep_slice
583            let dashes_end = sep_slice.len();
584
585            let flush_left = dashes_start == text_start;
586            let flush_right = dashes_end == text_end;
587
588            col.alignment = match (flush_left, flush_right) {
589                (true, true) => Alignment::Default,
590                (true, false) => Alignment::Left,
591                (false, true) => Alignment::Right,
592                (false, false) => Alignment::Center,
593            };
594        } else {
595            // Without header, alignment based on first row (we'll handle this later)
596            col.alignment = Alignment::Default;
597        }
598    }
599}
600
601/// Try to parse a simple table starting at the given position.
602/// Returns the number of lines consumed if successful.
603pub(crate) fn try_parse_simple_table(
604    window: &StrippedLines<'_, '_>,
605    builder: &mut GreenNodeBuilder<'static>,
606    config: &ParserOptions,
607) -> Option<usize> {
608    let lines = window.raw();
609    let start_pos = window.pos();
610    log::trace!("try_parse_simple_table at line {}", start_pos + 1);
611
612    if start_pos >= lines.len() {
613        return None;
614    }
615
616    // Detection scans run against the container-prefix-stripped view so a
617    // table nested in `list → blockquote` (e.g. `- >  a   b`) has its `  > `
618    // prefix removed before the separator/column-shape checks. With an empty
619    // prefix `stripped == lines`. Emission re-emits the prefix bytes as
620    // tokens via the window; captions/blank lines still read raw `lines`.
621    let stripped = window.strip_all();
622
623    // Look for a separator line
624    let separator_pos = find_separator_line(&stripped, start_pos)?;
625    log::trace!("  found separator at line {}", separator_pos + 1);
626
627    let separator_line = stripped[separator_pos];
628    let mut columns = try_parse_table_separator(separator_line)?;
629
630    // Determine if there's a header (separator not at start)
631    let has_header = separator_pos > start_pos;
632    let header_line = if has_header {
633        Some(stripped[separator_pos - 1])
634    } else {
635        None
636    };
637
638    // Determine alignments
639    determine_alignments(&mut columns, separator_line, header_line);
640
641    // Find table end (blank line or end of input)
642    let end_pos = find_table_end(&stripped, separator_pos + 1);
643
644    // Must have at least one data row (or it's just a separator)
645    let data_rows = end_pos - separator_pos - 1;
646
647    if data_rows == 0 {
648        return None;
649    }
650
651    // Check for caption before table
652    let caption_before = find_caption_before_table(&stripped, start_pos);
653
654    // Check for caption after table
655    let caption_after = if caption_before.is_some() {
656        None
657    } else {
658        find_caption_after_table(&stripped, end_pos)
659    };
660
661    // Build the table
662    builder.start_node(SyntaxKind::SIMPLE_TABLE.into());
663
664    // Emit caption before if present
665    if let Some((cap_start, cap_end)) = caption_before {
666        emit_table_caption(builder, lines, cap_start, cap_end, config);
667
668        // Emit blank line between caption and table if present
669        if cap_end < start_pos {
670            for line in lines.iter().take(start_pos).skip(cap_end) {
671                if line.trim().is_empty() {
672                    builder.start_node(SyntaxKind::BLANK_LINE.into());
673                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
674                    builder.finish_node();
675                }
676            }
677        }
678    }
679
680    // Emit header if present. On the dispatch line the core already emitted
681    // the container prefix; only continuation rows re-emit it (via the window
682    // inside `emit_table_row`).
683    if has_header {
684        emit_table_row(
685            builder,
686            window,
687            separator_pos - 1,
688            &columns,
689            SyntaxKind::TABLE_HEADER,
690            config,
691        );
692    }
693
694    // Emit separator, re-emitting any continuation-line container prefix
695    // (`  > `) as WHITESPACE/BLOCK_QUOTE_MARKER tokens before the row text.
696    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
697    let separator_tail = window.emit_or_dispatch_tail(builder, separator_pos);
698    emit_line_tokens(builder, separator_tail);
699    builder.finish_node();
700
701    // Emit data rows (always continuation lines)
702    for idx in (separator_pos + 1)..end_pos {
703        emit_table_row(
704            builder,
705            window,
706            idx,
707            &columns,
708            SyntaxKind::TABLE_ROW,
709            config,
710        );
711    }
712
713    // Emit caption after if present
714    if let Some((cap_start, cap_end)) = caption_after {
715        // Emit blank line before caption if needed
716        if cap_start > end_pos {
717            for line in lines.iter().take(cap_start).skip(end_pos) {
718                if line.trim().is_empty() {
719                    builder.start_node(SyntaxKind::BLANK_LINE.into());
720                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
721                    builder.finish_node();
722                }
723            }
724        }
725        emit_table_caption(builder, lines, cap_start, cap_end, config);
726    }
727
728    builder.finish_node(); // SimpleTable
729
730    // Calculate lines consumed (including captions)
731    let table_start = if let Some((cap_start, _)) = caption_before {
732        cap_start
733    } else if has_header {
734        separator_pos - 1
735    } else {
736        separator_pos
737    };
738
739    let table_end = if let Some((_, cap_end)) = caption_after {
740        cap_end
741    } else {
742        end_pos
743    };
744
745    let lines_consumed = table_end - table_start;
746
747    Some(lines_consumed)
748}
749
750/// Find the position of a separator line starting from pos.
751fn find_separator_line(lines: &[&str], start_pos: usize) -> Option<usize> {
752    log::trace!("  find_separator_line from line {}", start_pos + 1);
753
754    // Check first line
755    log::trace!("    checking first line: {:?}", lines[start_pos]);
756    if try_parse_table_separator(lines[start_pos]).is_some() {
757        log::trace!("    separator found at first line");
758        return Some(start_pos);
759    }
760
761    // Check second line (for table with header)
762    if start_pos + 1 < lines.len()
763        && !lines[start_pos].trim().is_empty()
764        && try_parse_table_separator(lines[start_pos + 1]).is_some()
765    {
766        return Some(start_pos + 1);
767    }
768    None
769}
770
771/// Find where the table ends (first blank line or end of input).
772fn find_table_end(lines: &[&str], start_pos: usize) -> usize {
773    for i in start_pos..lines.len() {
774        if lines[i].trim().is_empty() {
775            return i;
776        }
777        // Check if this could be a closing separator
778        if try_parse_table_separator(lines[i]).is_some() {
779            // Check if next line is blank or end
780            if i + 1 >= lines.len() || lines[i + 1].trim().is_empty() {
781                return i + 1;
782            }
783        }
784    }
785    lines.len()
786}
787
788/// Emit a table row (header or data row) with inline-parsed cells for simple tables.
789/// Uses column boundaries from the separator line to extract cells.
790fn emit_table_row(
791    builder: &mut GreenNodeBuilder<'static>,
792    window: &StrippedLines<'_, '_>,
793    abs_idx: usize,
794    columns: &[Column],
795    row_kind: SyntaxKind,
796    config: &ParserOptions,
797) {
798    builder.start_node(row_kind.into());
799
800    // On continuation lines the leading `  > ` prefix is re-emitted as
801    // WHITESPACE/BLOCK_QUOTE_MARKER tokens inside the row node and the
802    // stripped tail returned; the dispatch line just strips its (already
803    // core-emitted) prefix. Empty prefix ⇒ the raw line.
804    let line = window.emit_or_dispatch_tail(builder, abs_idx);
805
806    let (line_without_newline, newline_str) = strip_newline(line);
807
808    // Emit leading whitespace if present
809    let trimmed = line_without_newline.trim_start();
810    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
811    if leading_ws_len > 0 {
812        builder.token(
813            SyntaxKind::WHITESPACE.into(),
814            &line_without_newline[..leading_ws_len],
815        );
816    }
817
818    // Track where we are in the line (for losslessness)
819    let mut current_pos = 0;
820
821    // Extract and emit cells based on column boundaries
822    for col in columns.iter() {
823        // Calculate actual positions in the trimmed line (accounting for leading whitespace)
824        let cell_start = if col.start >= leading_ws_len {
825            column_offset_to_byte_index(trimmed, col.start - leading_ws_len)
826        } else {
827            0
828        };
829
830        let cell_end = if col.end >= leading_ws_len {
831            column_offset_to_byte_index(trimmed, col.end - leading_ws_len)
832        } else {
833            0
834        };
835
836        // Extract cell text from column bounds. When the column lies entirely
837        // before the trimmed content (col.end <= leading_ws_len) both bounds
838        // clamp to 0; treat that as an empty cell rather than re-emitting the
839        // whole row.
840        let cell_text = if cell_start < cell_end && cell_start < trimmed.len() {
841            &trimmed[cell_start..cell_end]
842        } else {
843            ""
844        };
845
846        let cell_content = cell_text.trim();
847        let cell_content_start = cell_text.len() - cell_text.trim_start().len();
848
849        // Emit any whitespace from current position to start of cell content
850        let content_abs_pos = (cell_start + cell_content_start).min(trimmed.len());
851        if current_pos < content_abs_pos {
852            builder.token(
853                SyntaxKind::WHITESPACE.into(),
854                &trimmed[current_pos..content_abs_pos],
855            );
856        }
857
858        // Emit cell with inline parsing
859        emit_table_cell(builder, cell_content, config);
860
861        // Update current position to end of cell content
862        current_pos = content_abs_pos + cell_content.len();
863    }
864
865    // Emit any remaining whitespace after last cell
866    if current_pos < trimmed.len() {
867        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
868    }
869
870    // Emit newline if present
871    if !newline_str.is_empty() {
872        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
873    }
874
875    builder.finish_node();
876}
877
878// ============================================================================
879// Pipe Table Parsing
880// ============================================================================
881
882/// Check if a line is a pipe table separator line.
883/// Returns the column alignments if it's a valid separator.
884fn try_parse_pipe_separator(line: &str) -> Option<Vec<Alignment>> {
885    let trimmed = line.trim();
886
887    // Must contain at least one pipe
888    if !trimmed.contains('|') && !trimmed.contains('+') {
889        return None;
890    }
891
892    // Split by pipes (or + for orgtbl variant)
893    let cells: Vec<&str> = if trimmed.contains('+') {
894        // Orgtbl variant: use + as separator in separator line
895        trimmed.split(['|', '+']).collect()
896    } else {
897        trimmed.split('|').collect()
898    };
899
900    let mut alignments = Vec::new();
901
902    for cell in cells {
903        let cell = cell.trim();
904
905        // Skip empty cells (from leading/trailing pipes)
906        if cell.is_empty() {
907            continue;
908        }
909
910        // Must be dashes with optional colons
911        let starts_colon = cell.starts_with(':');
912        let ends_colon = cell.ends_with(':');
913
914        // Remove colons to check if rest is all dashes
915        let without_colons = cell.trim_start_matches(':').trim_end_matches(':');
916
917        // Must have at least one dash
918        if without_colons.is_empty() || !without_colons.chars().all(|c| c == '-') {
919            return None;
920        }
921
922        // Determine alignment from colon positions
923        let alignment = match (starts_colon, ends_colon) {
924            (true, true) => Alignment::Center,
925            (true, false) => Alignment::Left,
926            (false, true) => Alignment::Right,
927            (false, false) => Alignment::Default,
928        };
929
930        alignments.push(alignment);
931    }
932
933    // Must have at least one column
934    if alignments.is_empty() {
935        None
936    } else {
937        Some(alignments)
938    }
939}
940
941/// Split a pipe table row into cells.
942/// Handles escaped pipes (\|) properly by not splitting on them.
943fn parse_pipe_table_row(line: &str) -> Vec<String> {
944    let trimmed = line.trim();
945
946    let mut cells = Vec::new();
947    let mut current_cell = String::new();
948    let mut chars = trimmed.chars().peekable();
949    let mut char_count = 0;
950
951    while let Some(ch) = chars.next() {
952        char_count += 1;
953        match ch {
954            '\\' => {
955                // Check if next char is a pipe - if so, it's an escaped pipe
956                if let Some(&'|') = chars.peek() {
957                    current_cell.push('\\');
958                    current_cell.push('|');
959                    chars.next(); // consume the pipe
960                } else {
961                    current_cell.push(ch);
962                }
963            }
964            '|' => {
965                // Check if this is the leading pipe (first character)
966                if char_count == 1 {
967                    continue; // Skip leading pipe
968                }
969
970                // End current cell, start new one
971                cells.push(current_cell.trim().to_string());
972                current_cell.clear();
973            }
974            _ => {
975                current_cell.push(ch);
976            }
977        }
978    }
979
980    // Add last cell if it's not empty (it would be empty if line ended with pipe)
981    let trimmed_cell = current_cell.trim().to_string();
982    if !trimmed_cell.is_empty() {
983        cells.push(trimmed_cell);
984    }
985
986    cells
987}
988
989/// Emit a pipe table row with inline-parsed cells.
990/// Preserves losslessness by emitting exact byte representation while parsing cell content inline.
991fn emit_pipe_table_row(
992    builder: &mut GreenNodeBuilder<'static>,
993    window: &StrippedLines<'_, '_>,
994    abs_idx: usize,
995    row_kind: SyntaxKind,
996    config: &ParserOptions,
997) {
998    builder.start_node(row_kind.into());
999
1000    // On continuation lines (separator/data rows under a list+blockquote
1001    // container) the leading `  > ` prefix is not consumed by the core;
1002    // `emit_prefix_at` re-emits it as WHITESPACE/BLOCK_QUOTE_MARKER tokens
1003    // and returns the stripped tail. On the dispatch line the core already
1004    // emitted the prefix, so `dispatch_tail` just strips it from our view.
1005    // With an empty prefix (non-nested tables) both are no-ops returning
1006    // the raw line.
1007    let line = if abs_idx == window.dispatch_pos() {
1008        window.dispatch_tail()
1009    } else {
1010        window.emit_prefix_at(builder, abs_idx)
1011    };
1012
1013    let (line_without_newline, newline_str) = strip_newline(line);
1014    let trimmed = line_without_newline.trim();
1015
1016    // Parse cell boundaries
1017    let mut cell_starts = Vec::new();
1018    let mut cell_ends = Vec::new();
1019    let mut in_escape = false;
1020
1021    // Find all pipe positions (excluding escaped ones)
1022    let mut pipe_positions = Vec::new();
1023    for (i, ch) in trimmed.char_indices() {
1024        if in_escape {
1025            in_escape = false;
1026            continue;
1027        }
1028        if ch == '\\' {
1029            in_escape = true;
1030            continue;
1031        }
1032        if ch == '|' {
1033            pipe_positions.push(i);
1034        }
1035    }
1036
1037    // Determine cell boundaries based on pipe positions
1038    if pipe_positions.is_empty() {
1039        // No pipes - treat entire line as one cell (shouldn't happen for valid pipe tables)
1040        cell_starts.push(0);
1041        cell_ends.push(trimmed.len());
1042    } else {
1043        // Check if line starts with pipe
1044        let start_pipe = pipe_positions.first() == Some(&0);
1045        // Check if line ends with pipe
1046        let end_pipe = pipe_positions.last() == Some(&(trimmed.len() - 1));
1047
1048        if start_pipe {
1049            // Skip first pipe
1050            for i in 1..pipe_positions.len() {
1051                cell_starts.push(pipe_positions[i - 1] + 1);
1052                cell_ends.push(pipe_positions[i]);
1053            }
1054            // Add last cell if there's no trailing pipe
1055            if !end_pipe {
1056                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1057                cell_ends.push(trimmed.len());
1058            }
1059        } else {
1060            // No leading pipe
1061            cell_starts.push(0);
1062            cell_ends.push(pipe_positions[0]);
1063
1064            for i in 1..pipe_positions.len() {
1065                cell_starts.push(pipe_positions[i - 1] + 1);
1066                cell_ends.push(pipe_positions[i]);
1067            }
1068
1069            // Add last cell if there's no trailing pipe
1070            if !end_pipe {
1071                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1072                cell_ends.push(trimmed.len());
1073            }
1074        }
1075    }
1076
1077    // Emit leading whitespace if present (before trim)
1078    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1079    if leading_ws_len > 0 {
1080        builder.token(
1081            SyntaxKind::WHITESPACE.into(),
1082            &line_without_newline[..leading_ws_len],
1083        );
1084    }
1085
1086    // Emit cells with pipes
1087    for (idx, (start, end)) in cell_starts.iter().zip(cell_ends.iter()).enumerate() {
1088        // Emit pipe before cell (except for first cell if no leading pipe)
1089        if *start > 0 {
1090            builder.token(SyntaxKind::TEXT.into(), "|");
1091        } else if idx == 0 && trimmed.starts_with('|') {
1092            // Leading pipe
1093            builder.token(SyntaxKind::TEXT.into(), "|");
1094        }
1095
1096        // Get cell content with its whitespace
1097        let cell_with_ws = &trimmed[*start..*end];
1098        let cell_content = cell_with_ws.trim();
1099
1100        // Emit leading whitespace within cell
1101        let cell_leading_ws = &cell_with_ws[..cell_with_ws.len() - cell_with_ws.trim_start().len()];
1102        if !cell_leading_ws.is_empty() {
1103            builder.token(SyntaxKind::WHITESPACE.into(), cell_leading_ws);
1104        }
1105
1106        // Emit cell with inline parsing
1107        emit_table_cell(builder, cell_content, config);
1108
1109        // Emit trailing whitespace within cell
1110        let cell_trailing_ws_start = cell_leading_ws.len() + cell_content.len();
1111        if cell_trailing_ws_start < cell_with_ws.len() {
1112            builder.token(
1113                SyntaxKind::WHITESPACE.into(),
1114                &cell_with_ws[cell_trailing_ws_start..],
1115            );
1116        }
1117    }
1118
1119    // Emit trailing pipe if present
1120    if !pipe_positions.is_empty() && trimmed.ends_with('|') {
1121        builder.token(SyntaxKind::TEXT.into(), "|");
1122    }
1123
1124    // Emit trailing whitespace after trim (before newline)
1125    let trailing_ws_start = leading_ws_len + trimmed.len();
1126    if trailing_ws_start < line_without_newline.len() {
1127        builder.token(
1128            SyntaxKind::WHITESPACE.into(),
1129            &line_without_newline[trailing_ws_start..],
1130        );
1131    }
1132
1133    // Emit newline
1134    if !newline_str.is_empty() {
1135        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1136    }
1137
1138    builder.finish_node();
1139}
1140
1141/// Try to parse a pipe table starting at the given position.
1142/// Returns the number of lines consumed if successful.
1143pub(crate) fn try_parse_pipe_table(
1144    window: &StrippedLines<'_, '_>,
1145    builder: &mut GreenNodeBuilder<'static>,
1146    config: &ParserOptions,
1147) -> Option<usize> {
1148    let lines = window.raw();
1149    let start_pos = window.pos();
1150    if start_pos + 1 >= lines.len() {
1151        return None;
1152    }
1153
1154    // Detection scans run against a container-prefix-stripped view, so a
1155    // table nested in `list → blockquote` (e.g. `- > | a | b |`) has its
1156    // `  > ` prefix removed before the separator/cell shape checks. Each
1157    // entry is a no-alloc tail slice of the matching raw line; with an
1158    // empty prefix `stripped == lines`. The dispatch line uses the
1159    // emission-safe line-0 strip (its prefix was consumed by the core);
1160    // every other line gets the full continuation strip. Emission still
1161    // reads raw `lines` so the prefix bytes can be re-emitted as tokens.
1162    let stripped = window.strip_all();
1163
1164    // Check if this line is a caption followed by a table
1165    // If so, the actual table starts after the caption and blank line
1166    let (actual_start, caption_before) = if is_caption_followed_by_table(&stripped, start_pos) {
1167        let (cap_start, cap_end) = caption_range_starting_at(&stripped, start_pos)?;
1168        let mut pos = cap_end;
1169        while pos < stripped.len() && stripped[pos].trim().is_empty() {
1170            pos += 1;
1171        }
1172        (pos, Some((cap_start, cap_end)))
1173    } else {
1174        (start_pos, None)
1175    };
1176
1177    if actual_start + 1 >= lines.len() {
1178        return None;
1179    }
1180
1181    // First line should have pipes (potential header)
1182    if !stripped[actual_start].contains('|') {
1183        return None;
1184    }
1185
1186    // Second line should be separator
1187    let alignments = try_parse_pipe_separator(stripped[actual_start + 1])?;
1188
1189    // Parse header cells
1190    let header_cells = parse_pipe_table_row(stripped[actual_start]);
1191
1192    // Number of columns should match (approximately - be lenient)
1193    if header_cells.len() != alignments.len() && !header_cells.is_empty() {
1194        // Only fail if very different
1195        if header_cells.len() < alignments.len() / 2 || header_cells.len() > alignments.len() * 2 {
1196            return None;
1197        }
1198    }
1199
1200    // Find table end (first blank line or end of input)
1201    let mut end_pos = actual_start + 2;
1202    while end_pos < stripped.len() {
1203        let line = stripped[end_pos];
1204        if line.trim().is_empty() {
1205            break;
1206        }
1207        // Row should have pipes
1208        if !line.contains('|') {
1209            break;
1210        }
1211        end_pos += 1;
1212    }
1213
1214    // Must have at least one data row
1215    if end_pos <= actual_start + 2 {
1216        return None;
1217    }
1218
1219    // Check for caption before table (only if we didn't already detect it)
1220    let caption_before =
1221        caption_before.or_else(|| find_caption_before_table(&stripped, actual_start));
1222
1223    // Check for caption after table
1224    let caption_after = if caption_before.is_some() {
1225        None
1226    } else {
1227        find_caption_after_table(&stripped, end_pos)
1228    };
1229
1230    // Build the pipe table
1231    builder.start_node(SyntaxKind::PIPE_TABLE.into());
1232
1233    // Emit caption before if present
1234    if let Some((cap_start, cap_end)) = caption_before {
1235        emit_table_caption(builder, lines, cap_start, cap_end, config);
1236        // Emit blank line between caption and table if present
1237        if cap_end < actual_start {
1238            for line in lines.iter().take(actual_start).skip(cap_end) {
1239                if line.trim().is_empty() {
1240                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1241                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1242                    builder.finish_node();
1243                }
1244            }
1245        }
1246    }
1247
1248    // Emit header row with inline-parsed cells. On the dispatch line the
1249    // core already emitted the container prefix; only when the header is a
1250    // continuation line (e.g. it follows a caption-before line) do we emit
1251    // the prefix here.
1252    emit_pipe_table_row(
1253        builder,
1254        window,
1255        actual_start,
1256        SyntaxKind::TABLE_HEADER,
1257        config,
1258    );
1259
1260    // Emit separator, re-emitting any continuation-line container prefix
1261    // (`  > `) as WHITESPACE/BLOCK_QUOTE_MARKER tokens before the row text.
1262    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1263    let sep_idx = actual_start + 1;
1264    let separator_tail = if sep_idx == window.dispatch_pos() {
1265        window.dispatch_tail()
1266    } else {
1267        window.emit_prefix_at(builder, sep_idx)
1268    };
1269    emit_line_tokens(builder, separator_tail);
1270    builder.finish_node();
1271
1272    // Emit data rows with inline-parsed cells (always continuation lines)
1273    for idx in (actual_start + 2)..end_pos {
1274        emit_pipe_table_row(builder, window, idx, SyntaxKind::TABLE_ROW, config);
1275    }
1276
1277    // Emit caption after if present
1278    if let Some((cap_start, cap_end)) = caption_after {
1279        // Emit blank line before caption if needed
1280        if cap_start > end_pos {
1281            for line in lines.iter().take(cap_start).skip(end_pos) {
1282                if line.trim().is_empty() {
1283                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1284                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1285                    builder.finish_node();
1286                }
1287            }
1288        }
1289        emit_table_caption(builder, lines, cap_start, cap_end, config);
1290    }
1291
1292    builder.finish_node(); // PipeTable
1293
1294    // Calculate lines consumed
1295    let table_start = caption_before
1296        .map(|(start, _)| start)
1297        .unwrap_or(actual_start);
1298    let table_end = if let Some((_, cap_end)) = caption_after {
1299        cap_end
1300    } else {
1301        end_pos
1302    };
1303
1304    Some(table_end - table_start)
1305}
1306
1307#[cfg(test)]
1308mod tests {
1309    use super::super::container_prefix::ContainerPrefix;
1310    use super::*;
1311
1312    #[test]
1313    fn test_separator_detection() {
1314        assert!(try_parse_table_separator("------- ------ ----------   -------").is_some());
1315        assert!(try_parse_table_separator("  ---  ---  ---").is_some());
1316        assert!(try_parse_table_separator("-------").is_none()); // horizontal rule
1317        assert!(try_parse_table_separator("--- --- ---").is_some()); // table separator
1318    }
1319
1320    #[test]
1321    fn test_column_extraction() {
1322        let line = "-------     ------ ----------   -------";
1323        let columns = extract_columns(line, 0);
1324        assert_eq!(columns.len(), 4);
1325    }
1326
1327    #[test]
1328    fn test_simple_table_with_header() {
1329        let input = vec![
1330            "  Right     Left     Center     Default",
1331            "-------     ------ ----------   -------",
1332            "     12     12        12            12",
1333            "    123     123       123          123",
1334            "",
1335        ];
1336
1337        let mut builder = GreenNodeBuilder::new();
1338        let prefix = ContainerPrefix::default();
1339        let window = StrippedLines::new(&input, 0, &prefix);
1340        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1341
1342        assert!(result.is_some());
1343        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1344    }
1345
1346    #[test]
1347    fn test_headerless_table() {
1348        let input = vec![
1349            "-------     ------ ----------   -------",
1350            "     12     12        12            12",
1351            "    123     123       123          123",
1352            "",
1353        ];
1354
1355        let mut builder = GreenNodeBuilder::new();
1356        let prefix = ContainerPrefix::default();
1357        let window = StrippedLines::new(&input, 0, &prefix);
1358        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1359
1360        assert!(result.is_some());
1361        assert_eq!(result.unwrap(), 3); // sep + 2 rows
1362    }
1363
1364    #[test]
1365    fn test_caption_prefix_detection() {
1366        assert!(try_parse_caption_prefix("Table: My caption").is_some());
1367        assert!(try_parse_caption_prefix("table: My caption").is_some());
1368        assert!(try_parse_caption_prefix(": My caption").is_some());
1369        assert!(try_parse_caption_prefix(":").is_none()); // Just colon, no content
1370        assert!(try_parse_caption_prefix("Not a caption").is_none());
1371    }
1372
1373    #[test]
1374    fn bare_colon_fenced_code_is_not_table_caption() {
1375        let input = "Term\n: ```\n  code\n  ```\n";
1376        let tree = crate::parse(input, None);
1377
1378        assert!(
1379            tree.descendants()
1380                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1381            "should parse as definition list"
1382        );
1383        assert!(
1384            tree.descendants()
1385                .any(|node| node.kind() == SyntaxKind::CODE_BLOCK),
1386            "definition should preserve fenced code block"
1387        );
1388        assert!(
1389            !tree
1390                .descendants()
1391                .any(|node| node.kind() == SyntaxKind::TABLE_CAPTION),
1392            "fenced code definition should not be parsed as table caption"
1393        );
1394    }
1395
1396    #[test]
1397    fn bare_colon_caption_after_div_opening_is_table_caption() {
1398        let input = "::: {#tbl:panel layout.nrow=\"1\"}\n  : My Caption {#tbl:foo-1}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\n  : My Caption2 {#tbl:foo-2}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\nCaption\n:::\n";
1399        let tree = crate::parse(input, None);
1400
1401        let caption_count = tree
1402            .descendants()
1403            .filter(|node| node.kind() == SyntaxKind::TABLE_CAPTION)
1404            .count();
1405        assert_eq!(
1406            caption_count, 2,
1407            "expected both captions to attach to tables"
1408        );
1409        assert!(
1410            !tree
1411                .descendants()
1412                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1413            "caption lines in this fenced div table layout should not parse as definition list"
1414        );
1415    }
1416
1417    #[test]
1418    fn test_table_with_caption_after() {
1419        let input = vec![
1420            "  Right     Left     Center     Default",
1421            "-------     ------ ----------   -------",
1422            "     12     12        12            12",
1423            "    123     123       123          123",
1424            "",
1425            "Table: Demonstration of simple table syntax.",
1426            "",
1427        ];
1428
1429        let mut builder = GreenNodeBuilder::new();
1430        let prefix = ContainerPrefix::default();
1431        let window = StrippedLines::new(&input, 0, &prefix);
1432        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1433
1434        assert!(result.is_some());
1435        // Should consume: header + sep + 2 rows + blank + caption
1436        assert_eq!(result.unwrap(), 6);
1437    }
1438
1439    #[test]
1440    fn test_table_with_caption_before() {
1441        let input = vec![
1442            "Table: Demonstration of simple table syntax.",
1443            "",
1444            "  Right     Left     Center     Default",
1445            "-------     ------ ----------   -------",
1446            "     12     12        12            12",
1447            "    123     123       123          123",
1448            "",
1449        ];
1450
1451        let mut builder = GreenNodeBuilder::new();
1452        let prefix = ContainerPrefix::default();
1453        let window = StrippedLines::new(&input, 2, &prefix);
1454        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1455
1456        assert!(result.is_some());
1457        // Should consume: caption + blank + header + sep + 2 rows
1458        assert_eq!(result.unwrap(), 6);
1459    }
1460
1461    #[test]
1462    fn test_caption_with_colon_prefix() {
1463        let input = vec![
1464            "  Right     Left",
1465            "-------     ------",
1466            "     12     12",
1467            "",
1468            ": Short caption",
1469            "",
1470        ];
1471
1472        let mut builder = GreenNodeBuilder::new();
1473        let prefix = ContainerPrefix::default();
1474        let window = StrippedLines::new(&input, 0, &prefix);
1475        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1476
1477        assert!(result.is_some());
1478        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1479    }
1480
1481    #[test]
1482    fn test_multiline_caption() {
1483        let input = vec![
1484            "  Right     Left",
1485            "-------     ------",
1486            "     12     12",
1487            "",
1488            "Table: This is a longer caption",
1489            "that spans multiple lines.",
1490            "",
1491        ];
1492
1493        let mut builder = GreenNodeBuilder::new();
1494        let prefix = ContainerPrefix::default();
1495        let window = StrippedLines::new(&input, 0, &prefix);
1496        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1497
1498        assert!(result.is_some());
1499        // Should consume through end of multi-line caption
1500        assert_eq!(result.unwrap(), 6);
1501    }
1502
1503    #[test]
1504    fn test_simple_table_with_multibyte_cell_content() {
1505        let input = vec![
1506            "Name            Hex code     Hue     C, M, Y, K (%)   R, G, B (0-255)   R, G, B (%)",
1507            "--------------  ------------ ------- ---------------- ----------------- ------------",
1508            "        orange       #E69F00     41° 0, 50, 100, 0    230, 159, 0       90, 60, 0",
1509            "      sky blue       #56B4E9    202° 80, 0, 0, 0      86, 180, 233      35, 70, 90",
1510            "",
1511        ];
1512
1513        let mut builder = GreenNodeBuilder::new();
1514        let prefix = ContainerPrefix::default();
1515        let window = StrippedLines::new(&input, 0, &prefix);
1516        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1517
1518        assert!(result.is_some());
1519        assert_eq!(result.unwrap(), 4);
1520    }
1521
1522    // Pipe table tests
1523    #[test]
1524    fn test_pipe_separator_detection() {
1525        assert!(try_parse_pipe_separator("|------:|:-----|---------|:------:|").is_some());
1526        assert!(try_parse_pipe_separator("|---|---|").is_some());
1527        assert!(try_parse_pipe_separator("-----|-----:").is_some()); // No leading pipe
1528        assert!(try_parse_pipe_separator("|-----+-------|").is_some()); // Orgtbl variant
1529        assert!(try_parse_pipe_separator("not a separator").is_none());
1530    }
1531
1532    #[test]
1533    fn test_pipe_alignments() {
1534        let aligns = try_parse_pipe_separator("|------:|:-----|---------|:------:|").unwrap();
1535        assert_eq!(aligns.len(), 4);
1536        assert_eq!(aligns[0], Alignment::Right);
1537        assert_eq!(aligns[1], Alignment::Left);
1538        assert_eq!(aligns[2], Alignment::Default);
1539        assert_eq!(aligns[3], Alignment::Center);
1540    }
1541
1542    #[test]
1543    fn test_parse_pipe_table_row() {
1544        let cells = parse_pipe_table_row("| Right | Left | Center |");
1545        assert_eq!(cells.len(), 3);
1546        assert_eq!(cells[0], "Right");
1547        assert_eq!(cells[1], "Left");
1548        assert_eq!(cells[2], "Center");
1549
1550        // Without leading/trailing pipes
1551        let cells2 = parse_pipe_table_row("Right | Left | Center");
1552        assert_eq!(cells2.len(), 3);
1553    }
1554
1555    #[test]
1556    fn test_basic_pipe_table() {
1557        let input = vec![
1558            "",
1559            "| Right | Left | Center |",
1560            "|------:|:-----|:------:|",
1561            "|   12  |  12  |   12   |",
1562            "|  123  |  123 |  123   |",
1563            "",
1564        ];
1565
1566        let mut builder = GreenNodeBuilder::new();
1567        let prefix = ContainerPrefix::default();
1568        let window = StrippedLines::new(&input, 1, &prefix);
1569        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1570
1571        assert!(result.is_some());
1572        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1573    }
1574
1575    #[test]
1576    fn test_pipe_table_no_edge_pipes() {
1577        let input = vec![
1578            "",
1579            "fruit| price",
1580            "-----|-----:",
1581            "apple|2.05",
1582            "pear|1.37",
1583            "",
1584        ];
1585
1586        let mut builder = GreenNodeBuilder::new();
1587        let prefix = ContainerPrefix::default();
1588        let window = StrippedLines::new(&input, 1, &prefix);
1589        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1590
1591        assert!(result.is_some());
1592        assert_eq!(result.unwrap(), 4);
1593    }
1594
1595    #[test]
1596    fn test_pipe_table_with_caption() {
1597        let input = vec![
1598            "",
1599            "| Col1 | Col2 |",
1600            "|------|------|",
1601            "| A    | B    |",
1602            "",
1603            "Table: My pipe table",
1604            "",
1605        ];
1606
1607        let mut builder = GreenNodeBuilder::new();
1608        let prefix = ContainerPrefix::default();
1609        let window = StrippedLines::new(&input, 1, &prefix);
1610        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1611
1612        assert!(result.is_some());
1613        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1614    }
1615
1616    #[test]
1617    fn test_pipe_table_with_multiline_caption_before() {
1618        let input = vec![
1619            ": (#tab:base) base R quoting",
1620            "functions",
1621            "",
1622            "| C | D |",
1623            "|---|---|",
1624            "| 3 | 4 |",
1625            "",
1626        ];
1627
1628        let mut builder = GreenNodeBuilder::new();
1629        let prefix = ContainerPrefix::default();
1630        let window = StrippedLines::new(&input, 0, &prefix);
1631        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1632
1633        assert!(result.is_some());
1634        // caption(2) + blank(1) + header + sep + row
1635        assert_eq!(result.unwrap(), 6);
1636    }
1637}
1638
1639// ============================================================================
1640// Grid Table Parsing
1641// ============================================================================
1642
1643/// Check if a line is a grid table row separator (starts with +, contains -, ends with +).
1644/// Returns Some(vec of column info) if valid, None otherwise.
1645fn try_parse_grid_separator(line: &str) -> Option<Vec<GridColumn>> {
1646    let trimmed = line.trim_start();
1647    let leading_spaces = line.len() - trimmed.len();
1648
1649    // A grid border must begin at column 0 of its container content. Detection
1650    // runs on the container-prefix-stripped line (see `try_parse_grid_table`),
1651    // so any remaining leading whitespace means the border is indented relative
1652    // to its container -- pandoc parses that as a paragraph, not a grid table.
1653    if leading_spaces > 0 {
1654        return None;
1655    }
1656
1657    // Must start with + and end with +
1658    if !trimmed.starts_with('+') || !trimmed.trim_end().ends_with('+') {
1659        return None;
1660    }
1661
1662    // Split by + to get column segments
1663    let trimmed = trimmed.trim_end();
1664    let segments: Vec<&str> = trimmed.split('+').collect();
1665
1666    // Need at least 3 parts: empty before first +, column(s), empty after last +
1667    if segments.len() < 3 {
1668        return None;
1669    }
1670
1671    let mut columns = Vec::new();
1672
1673    // Parse each segment between + signs
1674    for segment in segments.iter().skip(1).take(segments.len() - 2) {
1675        if segment.is_empty() {
1676            continue;
1677        }
1678
1679        // Segment must be dashes/equals with optional colons for alignment
1680        let seg_trimmed = *segment;
1681
1682        // Get the fill character (after removing colons)
1683        let inner = seg_trimmed.trim_start_matches(':').trim_end_matches(':');
1684
1685        // Must be all dashes or all equals
1686        if inner.is_empty() {
1687            return None;
1688        }
1689
1690        let first_char = inner.chars().next().unwrap();
1691        if first_char != '-' && first_char != '=' {
1692            return None;
1693        }
1694
1695        if !inner.chars().all(|c| c == first_char) {
1696            return None;
1697        }
1698
1699        let is_header_sep = first_char == '=';
1700
1701        columns.push(GridColumn {
1702            is_header_separator: is_header_sep,
1703            width: seg_trimmed.chars().count(),
1704        });
1705    }
1706
1707    if columns.is_empty() {
1708        None
1709    } else {
1710        Some(columns)
1711    }
1712}
1713
1714/// Column information for grid tables.
1715#[derive(Debug, Clone)]
1716struct GridColumn {
1717    is_header_separator: bool,
1718    width: usize,
1719}
1720
1721fn slice_cell_by_display_width(line: &str, start_byte: usize, width: usize) -> (usize, usize) {
1722    let mut end_byte = start_byte;
1723    let mut display_cols = 0usize;
1724
1725    for (offset, ch) in line[start_byte..].char_indices() {
1726        if ch == '|' {
1727            let sep_byte = start_byte + offset;
1728            return (sep_byte, sep_byte + 1);
1729        }
1730        let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
1731        if display_cols + ch_width > width {
1732            break;
1733        }
1734        display_cols += ch_width;
1735        end_byte = start_byte + offset + ch.len_utf8();
1736        if display_cols >= width {
1737            break;
1738        }
1739    }
1740
1741    // If the width budget is exhausted before seeing a separator (for example
1742    // because of padding/layout drift), advance to the next literal separator
1743    // to keep row slicing aligned and preserve losslessness.
1744    let mut sep_byte = end_byte;
1745    while sep_byte < line.len() {
1746        let mut chars = line[sep_byte..].chars();
1747        let Some(ch) = chars.next() else {
1748            break;
1749        };
1750        if ch == '|' {
1751            return (sep_byte, sep_byte + 1);
1752        }
1753        sep_byte += ch.len_utf8();
1754    }
1755
1756    (end_byte, end_byte)
1757}
1758
1759/// Check if a line is a grid table content row.
1760/// Accepts normal rows ending with `|` and spanning-style continuation lines ending with `+`.
1761fn is_grid_content_row(line: &str) -> bool {
1762    let trimmed = line.trim_start();
1763    let leading_spaces = line.len() - trimmed.len();
1764
1765    if leading_spaces > 3 {
1766        return false;
1767    }
1768
1769    let trimmed = trimmed.trim_end();
1770    trimmed.starts_with('|') && (trimmed.ends_with('|') || trimmed.ends_with('+'))
1771}
1772
1773/// Extract cell contents from a single grid table row line.
1774/// Returns a vector of cell contents (trimmed) based on column boundaries.
1775/// Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1776fn extract_grid_cells_from_line(line: &str, _columns: &[GridColumn]) -> Vec<String> {
1777    let (line_content, _) = strip_newline(line);
1778    let line_trimmed = line_content.trim();
1779
1780    if !line_trimmed.starts_with('|') || !line_trimmed.ends_with('|') {
1781        return vec![String::new(); _columns.len()];
1782    }
1783
1784    let mut cells = Vec::with_capacity(_columns.len());
1785    let mut pos_byte = 1; // Skip leading pipe
1786
1787    for col in _columns {
1788        let col_idx = cells.len();
1789        if pos_byte >= line_trimmed.len() {
1790            cells.push(String::new());
1791            continue;
1792        }
1793
1794        let start_byte = pos_byte;
1795        let end_byte = if col_idx + 1 == _columns.len() {
1796            line_trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1797        } else {
1798            let (end, next_start) = slice_cell_by_display_width(line_trimmed, pos_byte, col.width);
1799            pos_byte = next_start;
1800            end
1801        };
1802        cells.push(line_trimmed[start_byte..end_byte].trim().to_string());
1803        if col_idx + 1 == _columns.len() {
1804            pos_byte = line_trimmed.len();
1805        }
1806    }
1807
1808    cells
1809}
1810
1811/// Emit a grid table row with inline-parsed cells.
1812/// Handles multi-line rows by emitting first line with TABLE_CELL nodes,
1813/// then continuation lines as raw TEXT for losslessness.
1814fn emit_grid_table_row(
1815    builder: &mut GreenNodeBuilder<'static>,
1816    window: &StrippedLines<'_, '_>,
1817    indices: &[usize],
1818    columns: &[GridColumn],
1819    row_kind: SyntaxKind,
1820    config: &ParserOptions,
1821) {
1822    if indices.is_empty() {
1823        return;
1824    }
1825
1826    builder.start_node(row_kind.into());
1827
1828    // Emit first line with TABLE_CELL nodes. The continuation-line container
1829    // prefix (`  > `) is re-emitted as WHITESPACE/BLOCK_QUOTE_MARKER tokens
1830    // inside the row node before the cell text; the returned tail is the
1831    // prefix-stripped line we slice cells from (empty prefix ⇒ raw line).
1832    // Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1833    let first_line = window.emit_or_dispatch_tail(builder, indices[0]);
1834    let cell_contents = extract_grid_cells_from_line(first_line, columns);
1835    let (line_without_newline, newline_str) = strip_newline(first_line);
1836    let trimmed = line_without_newline.trim();
1837    let expected_pipe_count = columns.len().saturating_add(1);
1838    let actual_pipe_count = trimmed.chars().filter(|&c| c == '|').count();
1839
1840    // Rows that don't contain all expected column separators (spanning-style rows)
1841    // must be emitted verbatim for losslessness. The first line's prefix was
1842    // already consumed above; emit its tail and each continuation tail.
1843    if actual_pipe_count != expected_pipe_count {
1844        emit_line_tokens(builder, first_line);
1845        for &idx in &indices[1..] {
1846            let tail = window.emit_or_dispatch_tail(builder, idx);
1847            emit_line_tokens(builder, tail);
1848        }
1849        builder.finish_node();
1850        return;
1851    }
1852
1853    // Emit leading whitespace
1854    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1855    if leading_ws_len > 0 {
1856        builder.token(
1857            SyntaxKind::WHITESPACE.into(),
1858            &line_without_newline[..leading_ws_len],
1859        );
1860    }
1861
1862    // Emit leading pipe
1863    if trimmed.starts_with('|') {
1864        builder.token(SyntaxKind::TEXT.into(), "|");
1865    }
1866
1867    // Emit each cell based on fixed column widths from separators
1868    let mut pos_byte = 1usize; // after leading pipe
1869    for (idx, cell_content) in cell_contents.iter().enumerate() {
1870        let part = if idx < columns.len() && pos_byte <= trimmed.len() {
1871            let start_byte = pos_byte;
1872            let end_byte = if idx + 1 == columns.len() && !trimmed.is_empty() {
1873                trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1874            } else {
1875                let (end, next_start) =
1876                    slice_cell_by_display_width(trimmed, pos_byte, columns[idx].width);
1877                pos_byte = next_start;
1878                end
1879            };
1880            let slice = &trimmed[start_byte..end_byte];
1881            if idx + 1 == columns.len() {
1882                pos_byte = trimmed.len();
1883            }
1884            slice
1885        } else {
1886            ""
1887        };
1888
1889        // Emit leading whitespace in cell
1890        let cell_trimmed = part.trim();
1891        let ws_start_len = part.len() - part.trim_start().len();
1892        if ws_start_len > 0 {
1893            builder.token(SyntaxKind::WHITESPACE.into(), &part[..ws_start_len]);
1894        }
1895
1896        // Emit TABLE_CELL with inline parsing
1897        emit_table_cell(builder, cell_content, config);
1898
1899        // Emit trailing whitespace in cell
1900        let ws_end_start = ws_start_len + cell_trimmed.len();
1901        if ws_end_start < part.len() {
1902            builder.token(SyntaxKind::WHITESPACE.into(), &part[ws_end_start..]);
1903        }
1904
1905        // Emit pipe separator (unless this is the last cell and line doesn't end with |)
1906        if idx < cell_contents.len() - 1 || trimmed.ends_with('|') {
1907            builder.token(SyntaxKind::TEXT.into(), "|");
1908        }
1909    }
1910
1911    // Emit trailing whitespace before newline
1912    let trailing_ws_start = leading_ws_len + trimmed.len();
1913    if trailing_ws_start < line_without_newline.len() {
1914        builder.token(
1915            SyntaxKind::WHITESPACE.into(),
1916            &line_without_newline[trailing_ws_start..],
1917        );
1918    }
1919
1920    // Emit newline
1921    if !newline_str.is_empty() {
1922        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1923    }
1924
1925    // Emit continuation lines as TEXT for losslessness, re-emitting each
1926    // line's container prefix first.
1927    for &idx in &indices[1..] {
1928        let tail = window.emit_or_dispatch_tail(builder, idx);
1929        emit_line_tokens(builder, tail);
1930    }
1931
1932    builder.finish_node();
1933}
1934
1935/// Try to parse a grid table starting at the given position.
1936/// Returns the number of lines consumed if successful.
1937pub(crate) fn try_parse_grid_table(
1938    window: &StrippedLines<'_, '_>,
1939    builder: &mut GreenNodeBuilder<'static>,
1940    config: &ParserOptions,
1941) -> Option<usize> {
1942    let lines = window.raw();
1943    let start_pos = window.pos();
1944    if start_pos >= lines.len() {
1945        return None;
1946    }
1947
1948    // Detection scans run against the container-prefix-stripped view so a
1949    // grid table nested in `list → blockquote` (e.g. `- > +---+---+`) has its
1950    // `  > ` prefix removed before the separator/content-row shape checks.
1951    // With an empty prefix `stripped == lines`. Emission re-emits the prefix
1952    // bytes as tokens via the window; captions/blank lines read raw `lines`.
1953    let stripped = window.strip_all();
1954
1955    // Check if this line is a caption followed by a table
1956    // If so, the actual table starts after the caption and blank line
1957    let (actual_start, caption_before) = if is_caption_followed_by_table(&stripped, start_pos) {
1958        let (cap_start, cap_end) = caption_range_starting_at(&stripped, start_pos)?;
1959        let mut pos = cap_end;
1960        while pos < stripped.len() && stripped[pos].trim().is_empty() {
1961            pos += 1;
1962        }
1963        (pos, Some((cap_start, cap_end)))
1964    } else {
1965        (start_pos, None)
1966    };
1967
1968    if actual_start >= lines.len() {
1969        return None;
1970    }
1971
1972    // First line must be a grid separator
1973    let first_line = stripped[actual_start];
1974    let _columns = try_parse_grid_separator(first_line)?;
1975
1976    // Track table structure
1977    let mut end_pos = actual_start + 1;
1978    let mut found_header_sep = false;
1979    let mut in_footer = false;
1980
1981    // Scan table lines
1982    while end_pos < lines.len() {
1983        let line = stripped[end_pos];
1984
1985        // Check for blank line (table ends)
1986        if line.trim().is_empty() {
1987            break;
1988        }
1989
1990        // Check for separator line
1991        if let Some(sep_cols) = try_parse_grid_separator(line) {
1992            // Check if this is a header separator (=)
1993            if sep_cols.iter().any(|c| c.is_header_separator) {
1994                if !found_header_sep {
1995                    found_header_sep = true;
1996                } else if !in_footer {
1997                    // Second = separator starts footer
1998                    in_footer = true;
1999                }
2000            }
2001            end_pos += 1;
2002            continue;
2003        }
2004
2005        // Check for content row
2006        if is_grid_content_row(line) {
2007            end_pos += 1;
2008            continue;
2009        }
2010
2011        // Not a valid grid table line - table ends
2012        break;
2013    }
2014
2015    // Must have consumed at least 3 lines (top separator, content, bottom separator)
2016    // Or just top + content rows that end with a separator
2017    if end_pos <= actual_start + 1 {
2018        return None;
2019    }
2020
2021    // Last consumed line should be a separator for a well-formed table
2022    // But we'll be lenient and accept tables ending with content rows
2023
2024    // Check for caption before table (only if we didn't already detected it)
2025    let caption_before =
2026        caption_before.or_else(|| find_caption_before_table(&stripped, actual_start));
2027
2028    // Check for caption after table
2029    let caption_after = if caption_before.is_some() {
2030        None
2031    } else {
2032        find_caption_after_table(&stripped, end_pos)
2033    };
2034
2035    // Build the grid table
2036    builder.start_node(SyntaxKind::GRID_TABLE.into());
2037
2038    // Emit caption before if present
2039    if let Some((cap_start, cap_end)) = caption_before {
2040        emit_table_caption(builder, lines, cap_start, cap_end, config);
2041        // Emit blank line between caption and table if present
2042        if cap_end < actual_start {
2043            for line in lines.iter().take(actual_start).skip(cap_end) {
2044                if line.trim().is_empty() {
2045                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2046                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2047                    builder.finish_node();
2048                }
2049            }
2050        }
2051    }
2052
2053    // Track whether we've passed the header separator
2054    let mut past_header_sep = false;
2055    let mut in_footer_section = false;
2056    // Accumulate ABSOLUTE indices of the lines making up a multi-line row, so
2057    // each line's container prefix can be re-emitted via the window.
2058    let mut current_row_indices: Vec<usize> = Vec::new();
2059    let mut current_row_kind = SyntaxKind::TABLE_HEADER;
2060
2061    // Emit table rows - accumulate multi-line cells
2062    for (idx, &line) in stripped.iter().enumerate().take(end_pos).skip(actual_start) {
2063        if let Some(sep_cols) = try_parse_grid_separator(line) {
2064            // Separator line - emit any accumulated row first
2065            if !current_row_indices.is_empty() {
2066                emit_grid_table_row(
2067                    builder,
2068                    window,
2069                    &current_row_indices,
2070                    &sep_cols,
2071                    current_row_kind,
2072                    config,
2073                );
2074                current_row_indices.clear();
2075            }
2076
2077            let is_header_sep = sep_cols.iter().any(|c| c.is_header_separator);
2078
2079            // Re-emit any continuation-line container prefix (`  > `) as
2080            // WHITESPACE/BLOCK_QUOTE_MARKER tokens before the separator text.
2081            if is_header_sep {
2082                if !past_header_sep {
2083                    // This is the header/body separator
2084                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2085                    let tail = window.emit_or_dispatch_tail(builder, idx);
2086                    emit_line_tokens(builder, tail);
2087                    builder.finish_node();
2088                    past_header_sep = true;
2089                } else {
2090                    // Footer separator
2091                    if !in_footer_section {
2092                        in_footer_section = true;
2093                    }
2094                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2095                    let tail = window.emit_or_dispatch_tail(builder, idx);
2096                    emit_line_tokens(builder, tail);
2097                    builder.finish_node();
2098                }
2099            } else {
2100                // Regular separator (row boundary)
2101                builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2102                let tail = window.emit_or_dispatch_tail(builder, idx);
2103                emit_line_tokens(builder, tail);
2104                builder.finish_node();
2105            }
2106        } else if is_grid_content_row(line) {
2107            // Content row - accumulate for multi-line cells
2108            current_row_kind = if !past_header_sep && found_header_sep {
2109                SyntaxKind::TABLE_HEADER
2110            } else if in_footer_section {
2111                SyntaxKind::TABLE_FOOTER
2112            } else {
2113                SyntaxKind::TABLE_ROW
2114            };
2115
2116            current_row_indices.push(idx);
2117        }
2118    }
2119
2120    // Emit any remaining accumulated row
2121    if !current_row_indices.is_empty() {
2122        // Use first separator's columns for cell boundaries
2123        if let Some(sep_cols) = try_parse_grid_separator(stripped[actual_start]) {
2124            emit_grid_table_row(
2125                builder,
2126                window,
2127                &current_row_indices,
2128                &sep_cols,
2129                current_row_kind,
2130                config,
2131            );
2132        }
2133    }
2134
2135    // Emit caption after if present
2136    if let Some((cap_start, cap_end)) = caption_after {
2137        if cap_start > end_pos {
2138            for line in lines.iter().take(cap_start).skip(end_pos) {
2139                if line.trim().is_empty() {
2140                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2141                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2142                    builder.finish_node();
2143                }
2144            }
2145        }
2146        emit_table_caption(builder, lines, cap_start, cap_end, config);
2147    }
2148
2149    builder.finish_node(); // GRID_TABLE
2150
2151    // Calculate lines consumed
2152    let table_start = caption_before
2153        .map(|(start, _)| start)
2154        .unwrap_or(actual_start);
2155    let table_end = if let Some((_, cap_end)) = caption_after {
2156        cap_end
2157    } else {
2158        end_pos
2159    };
2160
2161    Some(table_end - table_start)
2162}
2163
2164#[cfg(test)]
2165mod grid_table_tests {
2166    use super::super::container_prefix::ContainerPrefix;
2167    use super::*;
2168
2169    #[test]
2170    fn test_grid_separator_detection() {
2171        assert!(try_parse_grid_separator("+---+---+").is_some());
2172        assert!(try_parse_grid_separator("+===+===+").is_some());
2173        assert!(try_parse_grid_separator("+---------------+---------------+").is_some());
2174        assert!(try_parse_grid_separator("+:---:+").is_some()); // center aligned
2175        assert!(try_parse_grid_separator("not a separator").is_none());
2176        assert!(try_parse_grid_separator("|---|---|").is_none()); // pipe table sep
2177
2178        // A grid border must sit at column 0 of its container content; an
2179        // indented border is not a grid table (matches pandoc, which parses
2180        // an indented `+---+` as a paragraph). Detection runs on the
2181        // container-stripped line, so any remaining leading space disqualifies.
2182        assert!(try_parse_grid_separator(" +---+---+").is_none());
2183        assert!(try_parse_grid_separator("  +---+---+").is_none());
2184        assert!(try_parse_grid_separator("   +===+===+").is_none());
2185    }
2186
2187    #[test]
2188    fn test_grid_header_separator() {
2189        let cols = try_parse_grid_separator("+===+===+").unwrap();
2190        assert!(cols.iter().all(|c| c.is_header_separator));
2191
2192        let cols2 = try_parse_grid_separator("+---+---+").unwrap();
2193        assert!(cols2.iter().all(|c| !c.is_header_separator));
2194    }
2195
2196    #[test]
2197    fn test_grid_content_row_detection() {
2198        assert!(is_grid_content_row("| content | content |"));
2199        assert!(is_grid_content_row("|  |  |"));
2200        assert!(is_grid_content_row("| content +------+"));
2201        assert!(!is_grid_content_row("+---+---+")); // separator, not content
2202        assert!(!is_grid_content_row("no pipes here"));
2203    }
2204
2205    #[test]
2206    fn test_basic_grid_table() {
2207        let input = vec![
2208            "+-------+-------+",
2209            "| Col1  | Col2  |",
2210            "+=======+=======+",
2211            "| A     | B     |",
2212            "+-------+-------+",
2213            "",
2214        ];
2215
2216        let mut builder = GreenNodeBuilder::new();
2217        let prefix = ContainerPrefix::default();
2218        let window = StrippedLines::new(&input, 0, &prefix);
2219        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2220
2221        assert!(result.is_some());
2222        assert_eq!(result.unwrap(), 5);
2223    }
2224
2225    #[test]
2226    fn test_grid_table_multirow() {
2227        let input = vec![
2228            "+---------------+---------------+",
2229            "| Fruit         | Advantages    |",
2230            "+===============+===============+",
2231            "| Bananas       | - wrapper     |",
2232            "|               | - color       |",
2233            "+---------------+---------------+",
2234            "| Oranges       | - scurvy      |",
2235            "|               | - tasty       |",
2236            "+---------------+---------------+",
2237            "",
2238        ];
2239
2240        let mut builder = GreenNodeBuilder::new();
2241        let prefix = ContainerPrefix::default();
2242        let window = StrippedLines::new(&input, 0, &prefix);
2243        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2244
2245        assert!(result.is_some());
2246        assert_eq!(result.unwrap(), 9);
2247    }
2248
2249    #[test]
2250    fn test_grid_table_with_footer() {
2251        let input = vec![
2252            "+-------+-------+",
2253            "| Fruit | Price |",
2254            "+=======+=======+",
2255            "| Apple | $1.00 |",
2256            "+-------+-------+",
2257            "| Pear  | $1.50 |",
2258            "+=======+=======+",
2259            "| Total | $2.50 |",
2260            "+=======+=======+",
2261            "",
2262        ];
2263
2264        let mut builder = GreenNodeBuilder::new();
2265        let prefix = ContainerPrefix::default();
2266        let window = StrippedLines::new(&input, 0, &prefix);
2267        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2268
2269        assert!(result.is_some());
2270        assert_eq!(result.unwrap(), 9);
2271    }
2272
2273    #[test]
2274    fn test_grid_table_headerless() {
2275        let input = vec![
2276            "+-------+-------+",
2277            "| A     | B     |",
2278            "+-------+-------+",
2279            "| C     | D     |",
2280            "+-------+-------+",
2281            "",
2282        ];
2283
2284        let mut builder = GreenNodeBuilder::new();
2285        let prefix = ContainerPrefix::default();
2286        let window = StrippedLines::new(&input, 0, &prefix);
2287        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2288
2289        assert!(result.is_some());
2290        assert_eq!(result.unwrap(), 5);
2291    }
2292
2293    #[test]
2294    fn test_grid_table_with_caption_before() {
2295        let input = vec![
2296            ": Sample table",
2297            "",
2298            "+-------+-------+",
2299            "| A     | B     |",
2300            "+=======+=======+",
2301            "| C     | D     |",
2302            "+-------+-------+",
2303            "",
2304        ];
2305
2306        let mut builder = GreenNodeBuilder::new();
2307        let prefix = ContainerPrefix::default();
2308        let window = StrippedLines::new(&input, 2, &prefix);
2309        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2310
2311        assert!(result.is_some());
2312        // Should include caption + blank + table
2313        assert_eq!(result.unwrap(), 7);
2314    }
2315
2316    #[test]
2317    fn test_grid_table_with_caption_after() {
2318        let input = vec![
2319            "+-------+-------+",
2320            "| A     | B     |",
2321            "+=======+=======+",
2322            "| C     | D     |",
2323            "+-------+-------+",
2324            "",
2325            "Table: My grid table",
2326            "",
2327        ];
2328
2329        let mut builder = GreenNodeBuilder::new();
2330        let prefix = ContainerPrefix::default();
2331        let window = StrippedLines::new(&input, 0, &prefix);
2332        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2333
2334        assert!(result.is_some());
2335        // table + blank + caption
2336        assert_eq!(result.unwrap(), 7);
2337    }
2338}
2339
2340// ============================================================================
2341// Multiline Table Parsing
2342// ============================================================================
2343
2344/// Check if a line is a multiline table separator (continuous dashes).
2345/// Multiline table separators span the full width and are all dashes.
2346/// Returns Some(columns) if valid, None otherwise.
2347fn try_parse_multiline_separator(line: &str) -> Option<Vec<Column>> {
2348    let trimmed = line.trim_start();
2349    let leading_spaces = line.len() - trimmed.len();
2350
2351    // Must have leading spaces <= 3 to not be a code block
2352    if leading_spaces > 3 {
2353        return None;
2354    }
2355
2356    let trimmed = trimmed.trim_end();
2357
2358    // Must be all dashes (continuous line of dashes)
2359    if trimmed.is_empty() || !trimmed.chars().all(|c| c == '-') {
2360        return None;
2361    }
2362
2363    // Must have at least 3 dashes
2364    if trimmed.len() < 3 {
2365        return None;
2366    }
2367
2368    // This is a full-width separator - columns will be determined by column separator lines
2369    Some(vec![Column {
2370        start: leading_spaces,
2371        end: leading_spaces + trimmed.len(),
2372        alignment: Alignment::Default,
2373    }])
2374}
2375
2376/// Check if a line is a column separator line for multiline tables.
2377/// Column separators have dashes with spaces between them to define columns.
2378fn is_column_separator(line: &str) -> bool {
2379    try_parse_table_separator(line).is_some() && !line.contains('*') && !line.contains('_')
2380}
2381
2382fn is_headerless_single_row_without_blank(
2383    lines: &[&str],
2384    row_start: usize,
2385    row_end: usize,
2386    columns: &[Column],
2387) -> bool {
2388    if row_start >= row_end {
2389        return false;
2390    }
2391
2392    if row_end - row_start == 1 {
2393        return false;
2394    }
2395
2396    let Some(last_col) = columns.last() else {
2397        return false;
2398    };
2399
2400    for line in lines.iter().take(row_end).skip(row_start + 1) {
2401        let (content, _) = strip_newline(line);
2402        let prefix_end = last_col.start.min(content.len());
2403        if !content[..prefix_end].trim().is_empty() {
2404            return false;
2405        }
2406    }
2407
2408    true
2409}
2410
2411/// Try to parse a multiline table starting at the given position.
2412/// Returns the number of lines consumed if successful.
2413pub(crate) fn try_parse_multiline_table(
2414    window: &StrippedLines<'_, '_>,
2415    builder: &mut GreenNodeBuilder<'static>,
2416    config: &ParserOptions,
2417) -> Option<usize> {
2418    let lines = window.raw();
2419    let start_pos = window.pos();
2420    if start_pos >= lines.len() {
2421        return None;
2422    }
2423
2424    // Detection scans run against the container-prefix-stripped view so a
2425    // multiline table nested in `list → blockquote` (e.g. `- > ----`) has its
2426    // `  > ` prefix removed before the separator/blank-row shape checks. The
2427    // interior `>`-only row then strips to `""` and registers as a blank row
2428    // separator. With an empty prefix `stripped == lines`. Emission re-emits
2429    // the prefix bytes as tokens via the window; captions read raw `lines`.
2430    let stripped = window.strip_all();
2431
2432    let first_line = stripped[start_pos];
2433
2434    // First line can be either:
2435    // 1. A full-width dash separator (for tables with headers)
2436    // 2. A column separator (for headerless tables)
2437    let is_full_width_start = try_parse_multiline_separator(first_line).is_some();
2438    let is_column_sep_start = !is_full_width_start && is_column_separator(first_line);
2439    let headerless_columns = if is_column_sep_start {
2440        try_parse_table_separator(first_line)
2441    } else {
2442        None
2443    };
2444
2445    if !is_full_width_start && !is_column_sep_start {
2446        return None;
2447    }
2448
2449    // Look ahead to find the structure
2450    let mut pos = start_pos + 1;
2451    let mut found_column_sep = is_column_sep_start; // Already found if headerless
2452    let mut column_sep_pos = if is_column_sep_start { start_pos } else { 0 };
2453    let mut has_header = false;
2454    let mut found_blank_line = false;
2455    let mut found_closing_sep = false;
2456    let mut content_line_count = 0usize;
2457
2458    // Scan for header section and column separator
2459    while pos < lines.len() {
2460        let line = stripped[pos];
2461
2462        // Check for column separator (defines columns) - only if we started with full-width
2463        if is_full_width_start && is_column_separator(line) && !found_column_sep {
2464            found_column_sep = true;
2465            column_sep_pos = pos;
2466            has_header = pos > start_pos + 1; // Has header if there's content before column sep
2467            pos += 1;
2468            continue;
2469        }
2470
2471        // Check for blank line (row separator in body)
2472        if line.trim().is_empty() {
2473            found_blank_line = true;
2474            pos += 1;
2475            // Check if next line is a valid closing separator for this table shape.
2476            if pos < lines.len() {
2477                let next = stripped[pos];
2478                let is_valid_closer = if is_full_width_start {
2479                    try_parse_multiline_separator(next).is_some()
2480                } else {
2481                    is_column_separator(next)
2482                };
2483                if is_valid_closer {
2484                    found_closing_sep = true;
2485                    pos += 1; // Include the closing separator
2486                    break;
2487                }
2488            }
2489            continue;
2490        }
2491
2492        // Check for closing full-width dashes (only for full-width-start tables).
2493        if is_full_width_start && try_parse_multiline_separator(line).is_some() {
2494            found_closing_sep = true;
2495            pos += 1;
2496            break;
2497        }
2498
2499        // Check for closing column separator (for headerless tables)
2500        if is_column_sep_start && is_column_separator(line) && content_line_count > 0 {
2501            found_closing_sep = true;
2502            pos += 1;
2503            break;
2504        }
2505
2506        // Content row
2507        content_line_count += 1;
2508        pos += 1;
2509    }
2510
2511    // Must have found a column separator to be a valid multiline table
2512    if !found_column_sep {
2513        return None;
2514    }
2515
2516    // Must have had at least one blank line between rows (distinguishes from simple tables)
2517    if !found_blank_line {
2518        if !is_column_sep_start {
2519            return None;
2520        }
2521        let columns = headerless_columns.as_deref()?;
2522        if !is_headerless_single_row_without_blank(&stripped, start_pos + 1, pos - 1, columns) {
2523            return None;
2524        }
2525    }
2526
2527    // Must have a closing separator
2528    if !found_closing_sep {
2529        return None;
2530    }
2531
2532    // Must have consumed more than just the opening separator
2533    if pos <= start_pos + 2 {
2534        return None;
2535    }
2536
2537    let end_pos = pos;
2538
2539    // Extract column boundaries from the separator line
2540    let columns = try_parse_table_separator(stripped[column_sep_pos])
2541        .expect("Column separator must be valid");
2542
2543    // Check for caption before table
2544    let caption_before = find_caption_before_table(&stripped, start_pos);
2545
2546    // Check for caption after table
2547    let caption_after = if caption_before.is_some() {
2548        None
2549    } else {
2550        find_caption_after_table(&stripped, end_pos)
2551    };
2552
2553    // Build the multiline table
2554    builder.start_node(SyntaxKind::MULTILINE_TABLE.into());
2555
2556    // Emit caption before if present
2557    if let Some((cap_start, cap_end)) = caption_before {
2558        emit_table_caption(builder, lines, cap_start, cap_end, config);
2559
2560        // Emit blank line between caption and table if present
2561        if cap_end < start_pos {
2562            for line in lines.iter().take(start_pos).skip(cap_end) {
2563                if line.trim().is_empty() {
2564                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2565                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2566                    builder.finish_node();
2567                }
2568            }
2569        }
2570    }
2571
2572    // Emit opening separator. The dispatch line's prefix was already consumed
2573    // by core (`dispatch_tail`); a non-dispatch start (caption-before case)
2574    // re-emits its `  > ` prefix via `emit_prefix_at`.
2575    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2576    let tail = window.emit_or_dispatch_tail(builder, start_pos);
2577    emit_line_tokens(builder, tail);
2578    builder.finish_node();
2579
2580    // Track state for emitting. Accumulate ABSOLUTE indices of the lines making
2581    // up a multi-line row so each line's container prefix can be re-emitted via
2582    // the window.
2583    let mut in_header = has_header;
2584    let mut current_row_indices: Vec<usize> = Vec::new();
2585
2586    for (i, &line) in stripped
2587        .iter()
2588        .enumerate()
2589        .take(end_pos)
2590        .skip(start_pos + 1)
2591    {
2592        // Column separator (header/body divider)
2593        if i == column_sep_pos {
2594            // Emit any accumulated header lines
2595            if !current_row_indices.is_empty() {
2596                emit_multiline_table_row(
2597                    builder,
2598                    window,
2599                    &current_row_indices,
2600                    &columns,
2601                    SyntaxKind::TABLE_HEADER,
2602                    config,
2603                );
2604                current_row_indices.clear();
2605            }
2606
2607            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2608            let tail = window.emit_or_dispatch_tail(builder, i);
2609            emit_line_tokens(builder, tail);
2610            builder.finish_node();
2611            in_header = false;
2612            continue;
2613        }
2614
2615        // Closing separator (full-width or column separator at end)
2616        if try_parse_multiline_separator(line).is_some() || is_column_separator(line) {
2617            // Emit any accumulated row lines
2618            if !current_row_indices.is_empty() {
2619                let kind = if in_header {
2620                    SyntaxKind::TABLE_HEADER
2621                } else {
2622                    SyntaxKind::TABLE_ROW
2623                };
2624                emit_multiline_table_row(
2625                    builder,
2626                    window,
2627                    &current_row_indices,
2628                    &columns,
2629                    kind,
2630                    config,
2631                );
2632                current_row_indices.clear();
2633            }
2634
2635            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2636            let tail = window.emit_or_dispatch_tail(builder, i);
2637            emit_line_tokens(builder, tail);
2638            builder.finish_node();
2639            continue;
2640        }
2641
2642        // Blank line (row separator)
2643        if line.trim().is_empty() {
2644            // Emit accumulated row
2645            if !current_row_indices.is_empty() {
2646                let kind = if in_header {
2647                    SyntaxKind::TABLE_HEADER
2648                } else {
2649                    SyntaxKind::TABLE_ROW
2650                };
2651                emit_multiline_table_row(
2652                    builder,
2653                    window,
2654                    &current_row_indices,
2655                    &columns,
2656                    kind,
2657                    config,
2658                );
2659                current_row_indices.clear();
2660            }
2661
2662            // Re-emit the interior `>`-only separator row's container prefix
2663            // (`  > `) inside the BLANK_LINE node so it round-trips losslessly.
2664            builder.start_node(SyntaxKind::BLANK_LINE.into());
2665            let tail = window.emit_or_dispatch_tail(builder, i);
2666            builder.token(SyntaxKind::BLANK_LINE.into(), tail);
2667            builder.finish_node();
2668            continue;
2669        }
2670
2671        // Content line - accumulate for current row
2672        current_row_indices.push(i);
2673    }
2674
2675    // Emit any remaining accumulated lines
2676    if !current_row_indices.is_empty() {
2677        let kind = if in_header {
2678            SyntaxKind::TABLE_HEADER
2679        } else {
2680            SyntaxKind::TABLE_ROW
2681        };
2682        emit_multiline_table_row(
2683            builder,
2684            window,
2685            &current_row_indices,
2686            &columns,
2687            kind,
2688            config,
2689        );
2690    }
2691
2692    // Emit caption after if present
2693    if let Some((cap_start, cap_end)) = caption_after {
2694        if cap_start > end_pos {
2695            for line in lines.iter().take(cap_start).skip(end_pos) {
2696                if line.trim().is_empty() {
2697                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2698                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2699                    builder.finish_node();
2700                }
2701            }
2702        }
2703        emit_table_caption(builder, lines, cap_start, cap_end, config);
2704    }
2705
2706    builder.finish_node(); // MultilineTable
2707
2708    // Calculate lines consumed
2709    let table_start = caption_before.map(|(start, _)| start).unwrap_or(start_pos);
2710    let table_end = if let Some((_, cap_end)) = caption_after {
2711        cap_end
2712    } else {
2713        end_pos
2714    };
2715
2716    Some(table_end - table_start)
2717}
2718
2719/// Extract cell contents from first line only (for CST emission).
2720/// Multi-line content will be in continuation TEXT tokens.
2721fn extract_first_line_cell_contents(line: &str, columns: &[Column]) -> Vec<String> {
2722    let (line_content, _) = strip_newline(line);
2723    let mut cells = Vec::new();
2724
2725    for column in columns.iter() {
2726        let column_start = column_offset_to_byte_index(line_content, column.start);
2727        let column_end = column_offset_to_byte_index(line_content, column.end);
2728
2729        // Extract FULL text for this column (including whitespace)
2730        let cell_text = if column_start < column_end {
2731            &line_content[column_start..column_end]
2732        } else if column_start < line_content.len() {
2733            &line_content[column_start..]
2734        } else {
2735            ""
2736        };
2737
2738        cells.push(cell_text.to_string());
2739    }
2740
2741    cells
2742}
2743
2744/// Emit a multiline table row with inline parsing (Phase 7.1).
2745///
2746/// `indices` are ABSOLUTE line indices into the window's raw buffer; each
2747/// physical line re-emits its container prefix (`  > `) via the window before
2748/// its content. With an empty prefix the tails equal the raw lines, so emission
2749/// is byte-identical to the pre-window path.
2750fn emit_multiline_table_row(
2751    builder: &mut GreenNodeBuilder<'static>,
2752    window: &StrippedLines<'_, '_>,
2753    indices: &[usize],
2754    columns: &[Column],
2755    kind: SyntaxKind,
2756    config: &ParserOptions,
2757) {
2758    if indices.is_empty() {
2759        return;
2760    }
2761
2762    builder.start_node(kind.into());
2763
2764    // Emit the first line's container prefix as tokens, then slice cells from
2765    // the prefix-stripped tail (for CST losslessness, only the first physical
2766    // line is parsed into cells; continuation lines stay verbatim TEXT).
2767    let first_line = window.emit_or_dispatch_tail(builder, indices[0]);
2768    let cell_contents = extract_first_line_cell_contents(first_line, columns);
2769    let (trimmed, newline_str) = strip_newline(first_line);
2770    let mut current_pos = 0;
2771
2772    for (col_idx, column) in columns.iter().enumerate() {
2773        let cell_text = &cell_contents[col_idx];
2774        let cell_start = column_offset_to_byte_index(trimmed, column.start);
2775        let cell_end = column_offset_to_byte_index(trimmed, column.end);
2776
2777        // Emit whitespace before cell
2778        if current_pos < cell_start {
2779            builder.token(
2780                SyntaxKind::WHITESPACE.into(),
2781                &trimmed[current_pos..cell_start],
2782            );
2783        }
2784
2785        // Emit cell with inline parsing (first line content only)
2786        emit_table_cell(builder, cell_text, config);
2787
2788        current_pos = cell_end;
2789    }
2790
2791    // Emit trailing whitespace
2792    if current_pos < trimmed.len() {
2793        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
2794    }
2795
2796    // Emit newline
2797    if !newline_str.is_empty() {
2798        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2799    }
2800
2801    // Emit continuation lines as TEXT to preserve exact line structure,
2802    // re-emitting each line's container prefix first.
2803    for &idx in &indices[1..] {
2804        let tail = window.emit_or_dispatch_tail(builder, idx);
2805        emit_line_tokens(builder, tail);
2806    }
2807
2808    builder.finish_node();
2809}
2810
2811#[cfg(test)]
2812mod multiline_table_tests {
2813    use super::super::container_prefix::ContainerPrefix;
2814    use super::*;
2815    use crate::syntax::SyntaxNode;
2816
2817    #[test]
2818    fn test_multiline_separator_detection() {
2819        assert!(
2820            try_parse_multiline_separator(
2821                "-------------------------------------------------------------"
2822            )
2823            .is_some()
2824        );
2825        assert!(try_parse_multiline_separator("---").is_some());
2826        assert!(try_parse_multiline_separator("  -----").is_some()); // with leading spaces
2827        assert!(try_parse_multiline_separator("--").is_none()); // too short
2828        assert!(try_parse_multiline_separator("--- ---").is_none()); // has spaces
2829        assert!(try_parse_multiline_separator("+---+").is_none()); // grid separator
2830    }
2831
2832    #[test]
2833    fn test_basic_multiline_table() {
2834        let input = vec![
2835            "-------------------------------------------------------------",
2836            " Centered   Default           Right Left",
2837            "  Header    Aligned         Aligned Aligned",
2838            "----------- ------- --------------- -------------------------",
2839            "   First    row                12.0 Example of a row that",
2840            "                                    spans multiple lines.",
2841            "",
2842            "  Second    row                 5.0 Here's another one.",
2843            "-------------------------------------------------------------",
2844            "",
2845        ];
2846
2847        let mut builder = GreenNodeBuilder::new();
2848        let prefix = ContainerPrefix::default();
2849        let window = StrippedLines::new(&input, 0, &prefix);
2850        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2851
2852        assert!(result.is_some());
2853        assert_eq!(result.unwrap(), 9);
2854    }
2855
2856    #[test]
2857    fn test_multiline_table_headerless() {
2858        let input = vec![
2859            "----------- ------- --------------- -------------------------",
2860            "   First    row                12.0 Example of a row that",
2861            "                                    spans multiple lines.",
2862            "",
2863            "  Second    row                 5.0 Here's another one.",
2864            "----------- ------- --------------- -------------------------",
2865            "",
2866        ];
2867
2868        let mut builder = GreenNodeBuilder::new();
2869        let prefix = ContainerPrefix::default();
2870        let window = StrippedLines::new(&input, 0, &prefix);
2871        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2872
2873        assert!(result.is_some());
2874        assert_eq!(result.unwrap(), 6);
2875    }
2876
2877    #[test]
2878    fn test_multiline_table_headerless_single_line_is_not_multiline() {
2879        let input = vec![
2880            "-------     ------ ----------   -------",
2881            "     12     12        12             12",
2882            "-------     ------ ----------   -------",
2883            "",
2884            "Not part of table.",
2885            "",
2886        ];
2887
2888        let mut builder = GreenNodeBuilder::new();
2889        let prefix = ContainerPrefix::default();
2890        let window = StrippedLines::new(&input, 0, &prefix);
2891        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2892
2893        assert!(result.is_none());
2894    }
2895
2896    #[test]
2897    fn test_multiline_table_headerless_single_row_continuation_without_blank_line() {
2898        let input = vec![
2899            "----------  ---------  -----------  ---------------------------",
2900            "   First    row               12.0  Example of a row that spans",
2901            "                                    multiple lines.",
2902            "----------  ---------  -----------  ---------------------------",
2903            "",
2904        ];
2905
2906        let mut builder = GreenNodeBuilder::new();
2907        let prefix = ContainerPrefix::default();
2908        let window = StrippedLines::new(&input, 0, &prefix);
2909        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2910
2911        assert!(result.is_some());
2912        assert_eq!(result.unwrap(), 4);
2913    }
2914
2915    #[test]
2916    fn test_multiline_table_with_caption() {
2917        let input = vec![
2918            "-------------------------------------------------------------",
2919            " Col1       Col2",
2920            "----------- -------",
2921            "   A        B",
2922            "",
2923            "-------------------------------------------------------------",
2924            "",
2925            "Table: Here's the caption.",
2926            "",
2927        ];
2928
2929        let mut builder = GreenNodeBuilder::new();
2930        let prefix = ContainerPrefix::default();
2931        let window = StrippedLines::new(&input, 0, &prefix);
2932        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2933
2934        assert!(result.is_some());
2935        // table (6 lines) + blank + caption
2936        assert_eq!(result.unwrap(), 8);
2937    }
2938
2939    #[test]
2940    fn test_multiline_table_single_row() {
2941        let input = vec![
2942            "---------------------------------------------",
2943            " Header1    Header2",
2944            "----------- -----------",
2945            "   Data     More data",
2946            "",
2947            "---------------------------------------------",
2948            "",
2949        ];
2950
2951        let mut builder = GreenNodeBuilder::new();
2952        let prefix = ContainerPrefix::default();
2953        let window = StrippedLines::new(&input, 0, &prefix);
2954        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2955
2956        assert!(result.is_some());
2957        assert_eq!(result.unwrap(), 6);
2958    }
2959
2960    #[test]
2961    fn test_headerless_multiline_table_does_not_close_on_full_width_rule() {
2962        let input = vec![
2963            "- - - - -",
2964            "Third section with underscores.",
2965            "",
2966            "_____",
2967            "",
2968            "> Quote before rule",
2969            ">",
2970            "> ***",
2971            ">",
2972            "> Quote after rule",
2973            "",
2974            "Final paragraph.",
2975            "",
2976            "Here's a horizontal rule:",
2977            "",
2978            "---",
2979            "Text directly after the horizontal rule.",
2980            "",
2981        ];
2982
2983        let mut builder = GreenNodeBuilder::new();
2984        let prefix = ContainerPrefix::default();
2985        let window = StrippedLines::new(&input, 0, &prefix);
2986        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2987
2988        assert!(result.is_none());
2989    }
2990
2991    #[test]
2992    fn test_not_multiline_table() {
2993        // Simple table should not be parsed as multiline
2994        let input = vec![
2995            "  Right     Left     Center     Default",
2996            "-------     ------ ----------   -------",
2997            "     12     12        12            12",
2998            "",
2999        ];
3000
3001        let mut builder = GreenNodeBuilder::new();
3002        let prefix = ContainerPrefix::default();
3003        let window = StrippedLines::new(&input, 0, &prefix);
3004        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3005
3006        // Should not parse because first line isn't a full-width separator
3007        assert!(result.is_none());
3008    }
3009
3010    // Phase 7.1: Unit tests for emit_table_cell() helper
3011    #[test]
3012    fn test_emit_table_cell_plain_text() {
3013        let mut builder = GreenNodeBuilder::new();
3014        emit_table_cell(&mut builder, "Cell", &ParserOptions::default());
3015        let green = builder.finish();
3016        let node = SyntaxNode::new_root(green);
3017
3018        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3019        assert_eq!(node.text(), "Cell");
3020
3021        // Should have TEXT child
3022        let children: Vec<_> = node.children_with_tokens().collect();
3023        assert_eq!(children.len(), 1);
3024        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
3025    }
3026
3027    #[test]
3028    fn test_emit_table_cell_with_emphasis() {
3029        let mut builder = GreenNodeBuilder::new();
3030        emit_table_cell(&mut builder, "*italic*", &ParserOptions::default());
3031        let green = builder.finish();
3032        let node = SyntaxNode::new_root(green);
3033
3034        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3035        assert_eq!(node.text(), "*italic*");
3036
3037        // Should have EMPHASIS child
3038        let children: Vec<_> = node.children().collect();
3039        assert_eq!(children.len(), 1);
3040        assert_eq!(children[0].kind(), SyntaxKind::EMPHASIS);
3041    }
3042
3043    #[test]
3044    fn test_emit_table_cell_with_code() {
3045        let mut builder = GreenNodeBuilder::new();
3046        emit_table_cell(&mut builder, "`code`", &ParserOptions::default());
3047        let green = builder.finish();
3048        let node = SyntaxNode::new_root(green);
3049
3050        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3051        assert_eq!(node.text(), "`code`");
3052
3053        // Should have CODE_SPAN child
3054        let children: Vec<_> = node.children().collect();
3055        assert_eq!(children.len(), 1);
3056        assert_eq!(children[0].kind(), SyntaxKind::INLINE_CODE);
3057    }
3058
3059    #[test]
3060    fn test_emit_table_cell_with_link() {
3061        let mut builder = GreenNodeBuilder::new();
3062        emit_table_cell(&mut builder, "[text](url)", &ParserOptions::default());
3063        let green = builder.finish();
3064        let node = SyntaxNode::new_root(green);
3065
3066        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3067        assert_eq!(node.text(), "[text](url)");
3068
3069        // Should have LINK child
3070        let children: Vec<_> = node.children().collect();
3071        assert_eq!(children.len(), 1);
3072        assert_eq!(children[0].kind(), SyntaxKind::LINK);
3073    }
3074
3075    #[test]
3076    fn test_emit_table_cell_with_strong() {
3077        let mut builder = GreenNodeBuilder::new();
3078        emit_table_cell(&mut builder, "**bold**", &ParserOptions::default());
3079        let green = builder.finish();
3080        let node = SyntaxNode::new_root(green);
3081
3082        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3083        assert_eq!(node.text(), "**bold**");
3084
3085        // Should have STRONG child
3086        let children: Vec<_> = node.children().collect();
3087        assert_eq!(children.len(), 1);
3088        assert_eq!(children[0].kind(), SyntaxKind::STRONG);
3089    }
3090
3091    #[test]
3092    fn test_emit_table_cell_mixed_inline() {
3093        let mut builder = GreenNodeBuilder::new();
3094        emit_table_cell(
3095            &mut builder,
3096            "Text **bold** and `code`",
3097            &ParserOptions::default(),
3098        );
3099        let green = builder.finish();
3100        let node = SyntaxNode::new_root(green);
3101
3102        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3103        assert_eq!(node.text(), "Text **bold** and `code`");
3104
3105        // Should have multiple children: TEXT, STRONG, TEXT, CODE_SPAN
3106        let children: Vec<_> = node.children_with_tokens().collect();
3107        assert!(children.len() >= 4);
3108
3109        // Check some expected types
3110        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
3111        assert_eq!(children[1].kind(), SyntaxKind::STRONG);
3112    }
3113
3114    #[test]
3115    fn test_emit_table_cell_empty() {
3116        let mut builder = GreenNodeBuilder::new();
3117        emit_table_cell(&mut builder, "", &ParserOptions::default());
3118        let green = builder.finish();
3119        let node = SyntaxNode::new_root(green);
3120
3121        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3122        assert_eq!(node.text(), "");
3123
3124        // Empty cell should have no children
3125        let children: Vec<_> = node.children_with_tokens().collect();
3126        assert_eq!(children.len(), 0);
3127    }
3128
3129    #[test]
3130    fn test_emit_table_cell_escaped_pipe() {
3131        let mut builder = GreenNodeBuilder::new();
3132        emit_table_cell(&mut builder, r"A \| B", &ParserOptions::default());
3133        let green = builder.finish();
3134        let node = SyntaxNode::new_root(green);
3135
3136        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3137        // The escaped pipe should be preserved
3138        assert_eq!(node.text(), r"A \| B");
3139    }
3140}