Skip to main content

panache_parser/parser/blocks/
tables.rs

1//! Simple table parsing for Pandoc's simple_tables extension.
2
3use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6use unicode_width::UnicodeWidthChar;
7
8use crate::parser::utils::attributes::{
9    emit_attribute_node, try_parse_trailing_attributes_with_pos,
10};
11use crate::parser::utils::helpers::{emit_line_tokens, strip_newline};
12use crate::parser::utils::inline_emission;
13
14use super::container_prefix::StrippedLines;
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum Alignment {
18    Left,
19    Right,
20    Center,
21    Default,
22}
23
24/// Column information extracted from the separator line.
25#[derive(Debug, Clone)]
26pub(crate) struct Column {
27    /// Start position (byte index) in the line
28    start: usize,
29    /// End position (byte index) in the line
30    end: usize,
31    /// Column alignment
32    alignment: Alignment,
33}
34
35/// Try to detect if a line is a table separator line.
36/// Returns Some(column positions) if it's a valid separator.
37pub(crate) fn try_parse_table_separator(line: &str) -> Option<Vec<Column>> {
38    let trimmed = line.trim_start();
39    // Strip trailing newline if present (CRLF or LF)
40    let (trimmed, newline_str) = strip_newline(trimmed);
41    let leading_spaces = line.len() - trimmed.len() - newline_str.len();
42
43    // Must have leading spaces <= 3 to not be a code block
44    if leading_spaces > 3 {
45        return None;
46    }
47
48    // Simple tables only use dashed separators.
49    if trimmed.contains('*') || trimmed.contains('_') {
50        return None;
51    }
52
53    // Must contain at least one dash
54    if !trimmed.contains('-') {
55        return None;
56    }
57
58    // A separator line consists of dashes and spaces
59    if !trimmed.chars().all(|c| c == '-' || c == ' ') {
60        return None;
61    }
62
63    // Must not be a horizontal rule.
64    let dash_groups: Vec<_> = trimmed.split(' ').filter(|s| !s.is_empty()).collect();
65    if dash_groups.len() <= 1 {
66        return None;
67    }
68
69    // Extract column positions from dash groups
70    let columns = extract_columns(trimmed, leading_spaces);
71
72    if columns.is_empty() {
73        return None;
74    }
75
76    Some(columns)
77}
78
79/// Extract column positions from a separator line.
80fn extract_columns(separator: &str, offset: usize) -> Vec<Column> {
81    let mut columns = Vec::new();
82    let mut in_dashes = false;
83    let mut col_start = 0;
84
85    for (i, ch) in separator.char_indices() {
86        match ch {
87            '-' if !in_dashes => {
88                col_start = i + offset;
89                in_dashes = true;
90            }
91            ' ' if in_dashes => {
92                columns.push(Column {
93                    start: col_start,
94                    end: i + offset,
95                    alignment: Alignment::Default, // Will be determined later
96                });
97                in_dashes = false;
98            }
99            _ => {}
100        }
101    }
102
103    // Handle last column
104    if in_dashes {
105        columns.push(Column {
106            start: col_start,
107            end: separator.len() + offset,
108            alignment: Alignment::Default,
109        });
110    }
111
112    columns
113}
114
115/// Convert a character column offset into a UTF-8 byte index for `line`.
116///
117/// Simple-table column boundaries come from ASCII separator lines where
118/// character and byte offsets are identical. Data rows may contain multibyte
119/// characters, so we must remap offsets before slicing.
120fn column_offset_to_byte_index(line: &str, offset: usize) -> usize {
121    line.char_indices()
122        .nth(offset)
123        .map_or(line.len(), |(byte_idx, _)| byte_idx)
124}
125
126/// Try to parse a table caption from a line.
127/// Returns Some((prefix_len, caption_text)) if it's a caption.
128fn try_parse_caption_prefix(line: &str) -> Option<(usize, &str)> {
129    let trimmed = line.trim_start();
130    let leading_spaces = line.len() - trimmed.len();
131
132    // Must have leading spaces <= 3 to not be a code block
133    if leading_spaces > 3 {
134        return None;
135    }
136
137    // Check for "Table:" or "table:" or just ":".
138    if let Some(rest) = trimmed.strip_prefix("Table:") {
139        Some((leading_spaces + 6, rest))
140    } else if let Some(rest) = trimmed.strip_prefix("table:") {
141        Some((leading_spaces + 6, rest))
142    } else if let Some(rest) = trimmed.strip_prefix(':') {
143        // Just ":" caption markers must be followed by whitespace (Pandoc-style).
144        // This avoids accidentally treating constructs like fenced div fences ":::" as captions.
145        if rest.starts_with(|c: char| c.is_whitespace()) {
146            Some((leading_spaces + 1, rest))
147        } else {
148            None
149        }
150    } else {
151        None
152    }
153}
154
155/// Check if a line could be the start of a table caption.
156fn is_table_caption_start(line: &str) -> bool {
157    try_parse_caption_prefix(line).is_some()
158}
159
160fn is_bare_colon_caption_start(line: &str) -> bool {
161    let trimmed = line.trim_start();
162    trimmed.starts_with(':') && !trimmed.starts_with("::") && !trimmed.starts_with(":::")
163}
164
165fn bare_colon_caption_looks_like_definition_code_block(line: &str) -> bool {
166    let Some((_, rest)) = try_parse_caption_prefix(line) else {
167        return false;
168    };
169    let trimmed = rest.trim_start();
170    trimmed.starts_with("```") || trimmed.starts_with("~~~")
171}
172
173fn line_is_fenced_div_fence(line: &str) -> bool {
174    let trimmed = line.trim_start();
175    let colon_count = trimmed.chars().take_while(|&c| c == ':').count();
176    if colon_count < 3 {
177        return false;
178    }
179    let rest = &trimmed[colon_count..];
180    rest.is_empty() || rest.starts_with(char::is_whitespace)
181}
182
183fn is_valid_caption_start_before_table(lines: &[&str], pos: usize) -> bool {
184    if !is_table_caption_start(lines[pos]) {
185        return false;
186    }
187
188    if is_bare_colon_caption_start(lines[pos])
189        && bare_colon_caption_looks_like_definition_code_block(lines[pos])
190    {
191        return false;
192    }
193
194    // Avoid stealing definition-list definitions (":   ...") as table captions.
195    if is_bare_colon_caption_start(lines[pos])
196        && pos > 0
197        && !lines[pos - 1].trim().is_empty()
198        && !line_is_fenced_div_fence(lines[pos - 1])
199    {
200        return false;
201    }
202    true
203}
204
205/// Check if a line could be the start of a grid table.
206/// Grid tables start with a separator line like +---+---+ or +===+===+
207fn is_grid_table_start(line: &str) -> bool {
208    try_parse_grid_separator(line).is_some()
209}
210
211/// Check if a line could be the start of a multiline table.
212/// Multiline tables start with either:
213/// - A full-width dash separator (----)
214/// - A column separator with dashes and spaces (---- ---- ----)
215fn is_multiline_table_start(line: &str) -> bool {
216    try_parse_multiline_separator(line).is_some() || is_column_separator(line)
217}
218
219/// Check if there's a table following a potential caption at this position.
220/// This is used to avoid parsing a caption as a paragraph when it belongs to a table.
221pub(crate) fn is_caption_followed_by_table(lines: &[&str], caption_pos: usize) -> bool {
222    if caption_pos >= lines.len() {
223        return false;
224    }
225
226    // Caption must start with a caption prefix
227    if !is_valid_caption_start_before_table(lines, caption_pos) {
228        return false;
229    }
230
231    let mut pos = caption_pos + 1;
232
233    // Skip continuation lines of caption (non-blank lines).
234    // Stop at fenced-div fences (`:::`) — those close the enclosing div and
235    // must not be folded into the caption.
236    while pos < lines.len()
237        && !lines[pos].trim().is_empty()
238        && !line_is_fenced_div_fence(lines[pos])
239    {
240        // If we hit a table separator, we found a table
241        if try_parse_table_separator(lines[pos]).is_some() {
242            return true;
243        }
244        pos += 1;
245    }
246
247    // Skip one blank line
248    if pos < lines.len() && lines[pos].trim().is_empty() {
249        pos += 1;
250    }
251
252    // Check for table at next position
253    if pos < lines.len() {
254        let line = lines[pos];
255
256        // Check for grid table start (+---+---+ or +===+===+)
257        if is_grid_table_start(line) {
258            return true;
259        }
260
261        // Check for multiline table start (---- or ---- ---- ----)
262        if is_multiline_table_start(line) {
263            return true;
264        }
265
266        // Could be a separator line (simple/pipe table, headerless)
267        if try_parse_table_separator(line).is_some() {
268            return true;
269        }
270
271        // Or could be a header line followed by separator (simple/pipe table with header)
272        if pos + 1 < lines.len() && !line.trim().is_empty() {
273            let next_line = lines[pos + 1];
274            if try_parse_table_separator(next_line).is_some()
275                || try_parse_pipe_separator(next_line).is_some()
276            {
277                return true;
278            }
279        }
280    }
281
282    false
283}
284
285fn caption_range_starting_at(lines: &[&str], start: usize) -> Option<(usize, usize)> {
286    if start >= lines.len() || !is_table_caption_start(lines[start]) {
287        return None;
288    }
289    let mut end = start + 1;
290    while end < lines.len()
291        && !lines[end].trim().is_empty()
292        && !line_is_fenced_div_fence(lines[end])
293    {
294        end += 1;
295    }
296    Some((start, end))
297}
298
299/// Find caption before table (if any).
300/// Returns (caption_start, caption_end) positions, or None.
301fn find_caption_before_table(lines: &[&str], table_start: usize) -> Option<(usize, usize)> {
302    if table_start == 0 {
303        return None;
304    }
305
306    // Look backward for a caption
307    // Caption must be immediately before table (with possible blank line between)
308    let mut pos = table_start - 1;
309
310    // Skip one blank line if present
311    if lines[pos].trim().is_empty() {
312        if pos == 0 {
313            return None;
314        }
315        pos -= 1;
316    }
317
318    // Now pos points to the last non-blank line before the table
319    // This could be the last line of a multiline caption, or a single-line caption
320    let caption_end = pos + 1; // End is exclusive
321
322    // If this line is NOT a caption start, it might be a continuation line
323    // Scan backward through non-blank lines to find the caption start
324    if !is_valid_caption_start_before_table(lines, pos) {
325        // Not a caption start - check if there's a caption start above
326        let mut scan_pos = pos;
327        while scan_pos > 0 {
328            scan_pos -= 1;
329            let line = lines[scan_pos];
330
331            // If we hit a blank line or fenced-div fence, we've gone too far
332            if line.trim().is_empty() || line_is_fenced_div_fence(line) {
333                return None;
334            }
335
336            // If we find a caption start, this is the beginning of the multiline caption
337            if is_valid_caption_start_before_table(lines, scan_pos) {
338                if scan_pos > 0 && !lines[scan_pos - 1].trim().is_empty() {
339                    return None;
340                }
341                if previous_nonblank_looks_like_table(lines, scan_pos) {
342                    return None;
343                }
344                return Some((scan_pos, caption_end));
345            }
346        }
347        // Scanned to beginning without finding caption start
348        None
349    } else {
350        if pos > 0 && !lines[pos - 1].trim().is_empty() {
351            return None;
352        }
353        if previous_nonblank_looks_like_table(lines, pos) {
354            return None;
355        }
356        // This line is a caption start - return the range
357        Some((pos, caption_end))
358    }
359}
360
361fn previous_nonblank_looks_like_table(lines: &[&str], pos: usize) -> bool {
362    if pos == 0 {
363        return false;
364    }
365    let mut i = pos;
366    while i > 0 {
367        i -= 1;
368        let line = lines[i].trim();
369        if line.is_empty() {
370            continue;
371        }
372        return line_looks_like_table_syntax(line);
373    }
374    false
375}
376
377fn line_looks_like_table_syntax(line: &str) -> bool {
378    if line.starts_with('|') && line.matches('|').count() >= 2 {
379        return true;
380    }
381    if line.starts_with('+') && line.ends_with('+') && (line.contains('-') || line.contains('=')) {
382        return true;
383    }
384    try_parse_table_separator(line).is_some()
385        || try_parse_pipe_separator(line).is_some()
386        || try_parse_grid_separator(line).is_some()
387}
388
389/// Find caption after table (if any).
390/// Returns (caption_start, caption_end) positions, or None.
391fn find_caption_after_table(lines: &[&str], table_end: usize) -> Option<(usize, usize)> {
392    if table_end >= lines.len() {
393        return None;
394    }
395
396    let mut pos = table_end;
397
398    // Skip one blank line if present
399    if pos < lines.len() && lines[pos].trim().is_empty() {
400        pos += 1;
401    }
402
403    if pos >= lines.len() {
404        return None;
405    }
406
407    // Check if this line is a caption
408    if is_table_caption_start(lines[pos]) {
409        let caption_start = pos;
410        // Find end of caption (continues until blank line or fenced-div fence)
411        let mut caption_end = caption_start + 1;
412        while caption_end < lines.len()
413            && !lines[caption_end].trim().is_empty()
414            && !line_is_fenced_div_fence(lines[caption_end])
415        {
416            caption_end += 1;
417        }
418        Some((caption_start, caption_end))
419    } else {
420        None
421    }
422}
423
424/// Emit a table caption node.
425/// Emit caption text for a single line. If `lift_trailing_attrs` is set and
426/// the text ends with a balanced `{...}` block, lift it into a structural
427/// `ATTRIBUTE` node so `AttributeNode::cast` finds its id (matches Pandoc's
428/// `+caption_attributes` behavior — `: caption {#tbl-id}` gives the table
429/// the id).
430fn emit_caption_line_text(
431    builder: &mut GreenNodeBuilder<'static>,
432    text_with_newline: &str,
433    config: &ParserOptions,
434    lift_trailing_attrs: bool,
435) {
436    let (text, newline_str) = strip_newline(text_with_newline);
437
438    if lift_trailing_attrs
439        && !text.is_empty()
440        && let Some((_attrs, before_attrs, start_brace_pos)) =
441            try_parse_trailing_attributes_with_pos(text)
442    {
443        let trimmed_len = text.trim_end().len();
444        let space = &text[before_attrs.len()..start_brace_pos];
445        let raw_attrs = &text[start_brace_pos..trimmed_len];
446        let trailing_ws = &text[trimmed_len..];
447
448        if !before_attrs.is_empty() {
449            inline_emission::emit_inlines(builder, before_attrs, config, false);
450        }
451        if !space.is_empty() {
452            builder.token(SyntaxKind::WHITESPACE.into(), space);
453        }
454        emit_attribute_node(builder, raw_attrs);
455        if !trailing_ws.is_empty() {
456            builder.token(SyntaxKind::WHITESPACE.into(), trailing_ws);
457        }
458        if !newline_str.is_empty() {
459            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
460        }
461        return;
462    }
463
464    if !text.is_empty() {
465        inline_emission::emit_inlines(builder, text, config, false);
466    }
467    if !newline_str.is_empty() {
468        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
469    }
470}
471
472fn emit_table_caption(
473    builder: &mut GreenNodeBuilder<'static>,
474    lines: &[&str],
475    start: usize,
476    end: usize,
477    config: &ParserOptions,
478) {
479    builder.start_node(SyntaxKind::TABLE_CAPTION.into());
480
481    let last_idx = (end - start).saturating_sub(1);
482
483    for (i, line) in lines[start..end].iter().enumerate() {
484        let lift_attrs = i == last_idx;
485        if i == 0 {
486            // First line - parse and emit prefix separately
487            let trimmed = line.trim_start();
488            let leading_ws_len = line.len() - trimmed.len();
489
490            // Emit leading whitespace if present
491            if leading_ws_len > 0 {
492                builder.token(SyntaxKind::WHITESPACE.into(), &line[..leading_ws_len]);
493            }
494
495            // Check for caption prefix and emit separately
496            // Calculate where the prefix ends (after trimmed content)
497            let prefix_and_rest = if line.ends_with('\n') {
498                &line[leading_ws_len..line.len() - 1] // Exclude newline
499            } else {
500                &line[leading_ws_len..]
501            };
502
503            let (prefix_len, prefix_text) = if prefix_and_rest.starts_with("Table: ") {
504                (7, "Table: ")
505            } else if prefix_and_rest.starts_with("table: ") {
506                (7, "table: ")
507            } else if prefix_and_rest.starts_with(": ") {
508                (2, ": ")
509            } else if prefix_and_rest.starts_with(':') {
510                (1, ":")
511            } else {
512                (0, "")
513            };
514
515            if prefix_len > 0 {
516                builder.token(SyntaxKind::TABLE_CAPTION_PREFIX.into(), prefix_text);
517
518                // Emit rest of line after prefix
519                let rest_start = leading_ws_len + prefix_len;
520                if rest_start < line.len() {
521                    emit_caption_line_text(builder, &line[rest_start..], config, lift_attrs);
522                }
523            } else {
524                // No recognized prefix, emit whole trimmed line
525                emit_caption_line_text(builder, &line[leading_ws_len..], config, lift_attrs);
526            }
527        } else {
528            // Continuation lines - emit with inline parsing (attrs only on last line).
529            emit_caption_line_text(builder, line, config, lift_attrs);
530        }
531    }
532
533    builder.finish_node(); // TABLE_CAPTION
534}
535
536/// Emit a table cell with inline content parsing.
537/// This is the core helper for Phase 7.1 table inline parsing migration.
538fn emit_table_cell(
539    builder: &mut GreenNodeBuilder<'static>,
540    cell_text: &str,
541    config: &ParserOptions,
542) {
543    builder.start_node(SyntaxKind::TABLE_CELL.into());
544
545    // Parse inline content within the cell
546    if !cell_text.is_empty() {
547        inline_emission::emit_inlines(builder, cell_text, config, false);
548    }
549
550    builder.finish_node(); // TABLE_CELL
551}
552
553/// Determine column alignments based on separator and optional header.
554fn determine_alignments(columns: &mut [Column], separator_line: &str, header_line: Option<&str>) {
555    for col in columns.iter_mut() {
556        let sep_slice = &separator_line[col.start..col.end];
557
558        if let Some(header) = header_line {
559            let header_start = column_offset_to_byte_index(header, col.start);
560            let header_end = column_offset_to_byte_index(header, col.end);
561
562            // Extract header text for this column
563            let header_text = if header_start < header_end {
564                header[header_start..header_end].trim()
565            } else if header_start < header.len() {
566                header[header_start..].trim()
567            } else {
568                ""
569            };
570
571            if header_text.is_empty() {
572                col.alignment = Alignment::Default;
573                continue;
574            }
575
576            // Find where the header text starts and ends within the column
577            let header_in_col = &header[header_start..header_end];
578            let text_start = header_in_col.len() - header_in_col.trim_start().len();
579            let text_end = header_in_col.trim_end().len() + text_start;
580
581            // Check dash alignment relative to text
582            let dashes_start = 0; // Dashes start at beginning of sep_slice
583            let dashes_end = sep_slice.len();
584
585            let flush_left = dashes_start == text_start;
586            let flush_right = dashes_end == text_end;
587
588            col.alignment = match (flush_left, flush_right) {
589                (true, true) => Alignment::Default,
590                (true, false) => Alignment::Left,
591                (false, true) => Alignment::Right,
592                (false, false) => Alignment::Center,
593            };
594        } else {
595            // Without header, alignment based on first row (we'll handle this later)
596            col.alignment = Alignment::Default;
597        }
598    }
599}
600
601/// Try to parse a simple table starting at the given position.
602/// Returns the number of lines consumed if successful.
603pub(crate) fn try_parse_simple_table(
604    window: &StrippedLines<'_, '_>,
605    builder: &mut GreenNodeBuilder<'static>,
606    config: &ParserOptions,
607) -> Option<usize> {
608    let lines = window.raw();
609    let start_pos = window.pos();
610    log::trace!("try_parse_simple_table at line {}", start_pos + 1);
611
612    if start_pos >= lines.len() {
613        return None;
614    }
615
616    // Cheap gate before the O(buffer) `strip_all` below: a simple table's
617    // separator must sit on the dispatch line or the line just after it (see
618    // `find_separator_line`). Table detection runs at every block start, so
619    // stripping the whole line buffer for every prose/math paragraph that
620    // can't be a table was quadratic on large documents. Peek just those one
621    // or two lines via `strip_at` and bail before materializing the full view.
622    let gate_first = window.strip_at(start_pos);
623    let separator_here = try_parse_table_separator(gate_first).is_some();
624    let separator_next = !separator_here
625        && start_pos + 1 < lines.len()
626        && !gate_first.trim().is_empty()
627        && try_parse_table_separator(window.strip_at(start_pos + 1)).is_some();
628    if !separator_here && !separator_next {
629        return None;
630    }
631
632    // Detection scans run against the container-prefix-stripped view so a
633    // table nested in `list → blockquote` (e.g. `- >  a   b`) has its `  > `
634    // prefix removed before the separator/column-shape checks. With an empty
635    // prefix `stripped == lines`. Emission re-emits the prefix bytes as
636    // tokens via the window; captions/blank lines still read raw `lines`.
637    let stripped = window.strip_all();
638
639    // Look for a separator line
640    let separator_pos = find_separator_line(&stripped, start_pos)?;
641    log::trace!("  found separator at line {}", separator_pos + 1);
642
643    let separator_line = stripped[separator_pos];
644    let mut columns = try_parse_table_separator(separator_line)?;
645
646    // Determine if there's a header (separator not at start)
647    let has_header = separator_pos > start_pos;
648    let header_line = if has_header {
649        Some(stripped[separator_pos - 1])
650    } else {
651        None
652    };
653
654    // Determine alignments
655    determine_alignments(&mut columns, separator_line, header_line);
656
657    // Find table end (blank line or end of input)
658    let end_pos = find_table_end(&stripped, separator_pos + 1);
659
660    // Must have at least one data row (or it's just a separator)
661    let data_rows = end_pos - separator_pos - 1;
662
663    if data_rows == 0 {
664        return None;
665    }
666
667    // Check for caption before table
668    let caption_before = find_caption_before_table(&stripped, start_pos);
669
670    // Check for caption after table
671    let caption_after = if caption_before.is_some() {
672        None
673    } else {
674        find_caption_after_table(&stripped, end_pos)
675    };
676
677    // Build the table
678    builder.start_node(SyntaxKind::SIMPLE_TABLE.into());
679
680    // Emit caption before if present
681    if let Some((cap_start, cap_end)) = caption_before {
682        emit_table_caption(builder, lines, cap_start, cap_end, config);
683
684        // Emit blank line between caption and table if present
685        if cap_end < start_pos {
686            for line in lines.iter().take(start_pos).skip(cap_end) {
687                if line.trim().is_empty() {
688                    builder.start_node(SyntaxKind::BLANK_LINE.into());
689                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
690                    builder.finish_node();
691                }
692            }
693        }
694    }
695
696    // Emit header if present. On the dispatch line the core already emitted
697    // the container prefix; only continuation rows re-emit it (via the window
698    // inside `emit_table_row`).
699    if has_header {
700        emit_table_row(
701            builder,
702            window,
703            separator_pos - 1,
704            &columns,
705            SyntaxKind::TABLE_HEADER,
706            config,
707        );
708    }
709
710    // Emit separator, re-emitting any continuation-line container prefix
711    // (`  > `) as WHITESPACE/BLOCK_QUOTE_MARKER tokens before the row text.
712    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
713    let separator_tail = window.emit_or_dispatch_tail(builder, separator_pos);
714    emit_line_tokens(builder, separator_tail);
715    builder.finish_node();
716
717    // Emit data rows (always continuation lines)
718    for idx in (separator_pos + 1)..end_pos {
719        emit_table_row(
720            builder,
721            window,
722            idx,
723            &columns,
724            SyntaxKind::TABLE_ROW,
725            config,
726        );
727    }
728
729    // Emit caption after if present
730    if let Some((cap_start, cap_end)) = caption_after {
731        // Emit blank line before caption if needed
732        if cap_start > end_pos {
733            for line in lines.iter().take(cap_start).skip(end_pos) {
734                if line.trim().is_empty() {
735                    builder.start_node(SyntaxKind::BLANK_LINE.into());
736                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
737                    builder.finish_node();
738                }
739            }
740        }
741        emit_table_caption(builder, lines, cap_start, cap_end, config);
742    }
743
744    builder.finish_node(); // SimpleTable
745
746    // Calculate lines consumed (including captions)
747    let table_start = if let Some((cap_start, _)) = caption_before {
748        cap_start
749    } else if has_header {
750        separator_pos - 1
751    } else {
752        separator_pos
753    };
754
755    let table_end = if let Some((_, cap_end)) = caption_after {
756        cap_end
757    } else {
758        end_pos
759    };
760
761    let lines_consumed = table_end - table_start;
762
763    Some(lines_consumed)
764}
765
766/// Find the position of a separator line starting from pos.
767fn find_separator_line(lines: &[&str], start_pos: usize) -> Option<usize> {
768    log::trace!("  find_separator_line from line {}", start_pos + 1);
769
770    // Check first line
771    log::trace!("    checking first line: {:?}", lines[start_pos]);
772    if try_parse_table_separator(lines[start_pos]).is_some() {
773        log::trace!("    separator found at first line");
774        return Some(start_pos);
775    }
776
777    // Check second line (for table with header)
778    if start_pos + 1 < lines.len()
779        && !lines[start_pos].trim().is_empty()
780        && try_parse_table_separator(lines[start_pos + 1]).is_some()
781    {
782        return Some(start_pos + 1);
783    }
784    None
785}
786
787/// Find where the table ends (first blank line or end of input).
788fn find_table_end(lines: &[&str], start_pos: usize) -> usize {
789    for i in start_pos..lines.len() {
790        if lines[i].trim().is_empty() {
791            return i;
792        }
793        // Check if this could be a closing separator
794        if try_parse_table_separator(lines[i]).is_some() {
795            // Check if next line is blank or end
796            if i + 1 >= lines.len() || lines[i + 1].trim().is_empty() {
797                return i + 1;
798            }
799        }
800    }
801    lines.len()
802}
803
804/// Emit a table row (header or data row) with inline-parsed cells for simple tables.
805/// Uses column boundaries from the separator line to extract cells.
806fn emit_table_row(
807    builder: &mut GreenNodeBuilder<'static>,
808    window: &StrippedLines<'_, '_>,
809    abs_idx: usize,
810    columns: &[Column],
811    row_kind: SyntaxKind,
812    config: &ParserOptions,
813) {
814    builder.start_node(row_kind.into());
815
816    // On continuation lines the leading `  > ` prefix is re-emitted as
817    // WHITESPACE/BLOCK_QUOTE_MARKER tokens inside the row node and the
818    // stripped tail returned; the dispatch line just strips its (already
819    // core-emitted) prefix. Empty prefix ⇒ the raw line.
820    let line = window.emit_or_dispatch_tail(builder, abs_idx);
821
822    let (line_without_newline, newline_str) = strip_newline(line);
823
824    // Emit leading whitespace if present
825    let trimmed = line_without_newline.trim_start();
826    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
827    if leading_ws_len > 0 {
828        builder.token(
829            SyntaxKind::WHITESPACE.into(),
830            &line_without_newline[..leading_ws_len],
831        );
832    }
833
834    // Track where we are in the line (for losslessness)
835    let mut current_pos = 0;
836
837    // Extract and emit cells based on column boundaries
838    for col in columns.iter() {
839        // Calculate actual positions in the trimmed line (accounting for leading whitespace)
840        let cell_start = if col.start >= leading_ws_len {
841            column_offset_to_byte_index(trimmed, col.start - leading_ws_len)
842        } else {
843            0
844        };
845
846        let cell_end = if col.end >= leading_ws_len {
847            column_offset_to_byte_index(trimmed, col.end - leading_ws_len)
848        } else {
849            0
850        };
851
852        // Extract cell text from column bounds. When the column lies entirely
853        // before the trimmed content (col.end <= leading_ws_len) both bounds
854        // clamp to 0; treat that as an empty cell rather than re-emitting the
855        // whole row.
856        let cell_text = if cell_start < cell_end && cell_start < trimmed.len() {
857            &trimmed[cell_start..cell_end]
858        } else {
859            ""
860        };
861
862        let cell_content = cell_text.trim();
863        let cell_content_start = cell_text.len() - cell_text.trim_start().len();
864
865        // Emit any whitespace from current position to start of cell content
866        let content_abs_pos = (cell_start + cell_content_start).min(trimmed.len());
867        if current_pos < content_abs_pos {
868            builder.token(
869                SyntaxKind::WHITESPACE.into(),
870                &trimmed[current_pos..content_abs_pos],
871            );
872        }
873
874        // Emit cell with inline parsing
875        emit_table_cell(builder, cell_content, config);
876
877        // Update current position to end of cell content
878        current_pos = content_abs_pos + cell_content.len();
879    }
880
881    // Emit any remaining whitespace after last cell
882    if current_pos < trimmed.len() {
883        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
884    }
885
886    // Emit newline if present
887    if !newline_str.is_empty() {
888        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
889    }
890
891    builder.finish_node();
892}
893
894// ============================================================================
895// Pipe Table Parsing
896// ============================================================================
897
898/// Check if a line is a pipe table separator line.
899/// Returns the column alignments if it's a valid separator.
900fn try_parse_pipe_separator(line: &str) -> Option<Vec<Alignment>> {
901    let trimmed = line.trim();
902
903    // Must contain at least one pipe
904    if !trimmed.contains('|') && !trimmed.contains('+') {
905        return None;
906    }
907
908    // Split by pipes (or + for orgtbl variant)
909    let cells: Vec<&str> = if trimmed.contains('+') {
910        // Orgtbl variant: use + as separator in separator line
911        trimmed.split(['|', '+']).collect()
912    } else {
913        trimmed.split('|').collect()
914    };
915
916    let mut alignments = Vec::new();
917
918    for cell in cells {
919        let cell = cell.trim();
920
921        // Skip empty cells (from leading/trailing pipes)
922        if cell.is_empty() {
923            continue;
924        }
925
926        // Must be dashes with optional colons
927        let starts_colon = cell.starts_with(':');
928        let ends_colon = cell.ends_with(':');
929
930        // Remove colons to check if rest is all dashes
931        let without_colons = cell.trim_start_matches(':').trim_end_matches(':');
932
933        // Must have at least one dash
934        if without_colons.is_empty() || !without_colons.chars().all(|c| c == '-') {
935            return None;
936        }
937
938        // Determine alignment from colon positions
939        let alignment = match (starts_colon, ends_colon) {
940            (true, true) => Alignment::Center,
941            (true, false) => Alignment::Left,
942            (false, true) => Alignment::Right,
943            (false, false) => Alignment::Default,
944        };
945
946        alignments.push(alignment);
947    }
948
949    // Must have at least one column
950    if alignments.is_empty() {
951        None
952    } else {
953        Some(alignments)
954    }
955}
956
957/// Split a pipe table row into cells.
958/// Handles escaped pipes (\|) properly by not splitting on them.
959fn parse_pipe_table_row(line: &str) -> Vec<String> {
960    let trimmed = line.trim();
961
962    let mut cells = Vec::new();
963    let mut current_cell = String::new();
964    let mut chars = trimmed.chars().peekable();
965    let mut char_count = 0;
966
967    while let Some(ch) = chars.next() {
968        char_count += 1;
969        match ch {
970            '\\' => {
971                // Check if next char is a pipe - if so, it's an escaped pipe
972                if let Some(&'|') = chars.peek() {
973                    current_cell.push('\\');
974                    current_cell.push('|');
975                    chars.next(); // consume the pipe
976                } else {
977                    current_cell.push(ch);
978                }
979            }
980            '|' => {
981                // Check if this is the leading pipe (first character)
982                if char_count == 1 {
983                    continue; // Skip leading pipe
984                }
985
986                // End current cell, start new one
987                cells.push(current_cell.trim().to_string());
988                current_cell.clear();
989            }
990            _ => {
991                current_cell.push(ch);
992            }
993        }
994    }
995
996    // Add last cell if it's not empty (it would be empty if line ended with pipe)
997    let trimmed_cell = current_cell.trim().to_string();
998    if !trimmed_cell.is_empty() {
999        cells.push(trimmed_cell);
1000    }
1001
1002    cells
1003}
1004
1005/// Emit a pipe table row with inline-parsed cells.
1006/// Preserves losslessness by emitting exact byte representation while parsing cell content inline.
1007fn emit_pipe_table_row(
1008    builder: &mut GreenNodeBuilder<'static>,
1009    window: &StrippedLines<'_, '_>,
1010    abs_idx: usize,
1011    row_kind: SyntaxKind,
1012    config: &ParserOptions,
1013) {
1014    builder.start_node(row_kind.into());
1015
1016    // On continuation lines (separator/data rows under a list+blockquote
1017    // container) the leading `  > ` prefix is not consumed by the core;
1018    // `emit_prefix_at` re-emits it as WHITESPACE/BLOCK_QUOTE_MARKER tokens
1019    // and returns the stripped tail. On the dispatch line the core already
1020    // emitted the prefix, so `dispatch_tail` just strips it from our view.
1021    // With an empty prefix (non-nested tables) both are no-ops returning
1022    // the raw line.
1023    let line = if abs_idx == window.dispatch_pos() {
1024        window.dispatch_tail()
1025    } else {
1026        window.emit_prefix_at(builder, abs_idx)
1027    };
1028
1029    let (line_without_newline, newline_str) = strip_newline(line);
1030    let trimmed = line_without_newline.trim();
1031
1032    // Parse cell boundaries
1033    let mut cell_starts = Vec::new();
1034    let mut cell_ends = Vec::new();
1035    let mut in_escape = false;
1036
1037    // Find all pipe positions (excluding escaped ones)
1038    let mut pipe_positions = Vec::new();
1039    for (i, ch) in trimmed.char_indices() {
1040        if in_escape {
1041            in_escape = false;
1042            continue;
1043        }
1044        if ch == '\\' {
1045            in_escape = true;
1046            continue;
1047        }
1048        if ch == '|' {
1049            pipe_positions.push(i);
1050        }
1051    }
1052
1053    // Determine cell boundaries based on pipe positions
1054    if pipe_positions.is_empty() {
1055        // No pipes - treat entire line as one cell (shouldn't happen for valid pipe tables)
1056        cell_starts.push(0);
1057        cell_ends.push(trimmed.len());
1058    } else {
1059        // Check if line starts with pipe
1060        let start_pipe = pipe_positions.first() == Some(&0);
1061        // Check if line ends with pipe
1062        let end_pipe = pipe_positions.last() == Some(&(trimmed.len() - 1));
1063
1064        if start_pipe {
1065            // Skip first pipe
1066            for i in 1..pipe_positions.len() {
1067                cell_starts.push(pipe_positions[i - 1] + 1);
1068                cell_ends.push(pipe_positions[i]);
1069            }
1070            // Add last cell if there's no trailing pipe
1071            if !end_pipe {
1072                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1073                cell_ends.push(trimmed.len());
1074            }
1075        } else {
1076            // No leading pipe
1077            cell_starts.push(0);
1078            cell_ends.push(pipe_positions[0]);
1079
1080            for i in 1..pipe_positions.len() {
1081                cell_starts.push(pipe_positions[i - 1] + 1);
1082                cell_ends.push(pipe_positions[i]);
1083            }
1084
1085            // Add last cell if there's no trailing pipe
1086            if !end_pipe {
1087                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1088                cell_ends.push(trimmed.len());
1089            }
1090        }
1091    }
1092
1093    // Emit leading whitespace if present (before trim)
1094    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1095    if leading_ws_len > 0 {
1096        builder.token(
1097            SyntaxKind::WHITESPACE.into(),
1098            &line_without_newline[..leading_ws_len],
1099        );
1100    }
1101
1102    // Emit cells with pipes
1103    for (idx, (start, end)) in cell_starts.iter().zip(cell_ends.iter()).enumerate() {
1104        // Emit pipe before cell (except for first cell if no leading pipe)
1105        if *start > 0 {
1106            builder.token(SyntaxKind::TEXT.into(), "|");
1107        } else if idx == 0 && trimmed.starts_with('|') {
1108            // Leading pipe
1109            builder.token(SyntaxKind::TEXT.into(), "|");
1110        }
1111
1112        // Get cell content with its whitespace
1113        let cell_with_ws = &trimmed[*start..*end];
1114        let cell_content = cell_with_ws.trim();
1115
1116        // Emit leading whitespace within cell
1117        let cell_leading_ws = &cell_with_ws[..cell_with_ws.len() - cell_with_ws.trim_start().len()];
1118        if !cell_leading_ws.is_empty() {
1119            builder.token(SyntaxKind::WHITESPACE.into(), cell_leading_ws);
1120        }
1121
1122        // Emit cell with inline parsing
1123        emit_table_cell(builder, cell_content, config);
1124
1125        // Emit trailing whitespace within cell
1126        let cell_trailing_ws_start = cell_leading_ws.len() + cell_content.len();
1127        if cell_trailing_ws_start < cell_with_ws.len() {
1128            builder.token(
1129                SyntaxKind::WHITESPACE.into(),
1130                &cell_with_ws[cell_trailing_ws_start..],
1131            );
1132        }
1133    }
1134
1135    // Emit trailing pipe if present
1136    if !pipe_positions.is_empty() && trimmed.ends_with('|') {
1137        builder.token(SyntaxKind::TEXT.into(), "|");
1138    }
1139
1140    // Emit trailing whitespace after trim (before newline)
1141    let trailing_ws_start = leading_ws_len + trimmed.len();
1142    if trailing_ws_start < line_without_newline.len() {
1143        builder.token(
1144            SyntaxKind::WHITESPACE.into(),
1145            &line_without_newline[trailing_ws_start..],
1146        );
1147    }
1148
1149    // Emit newline
1150    if !newline_str.is_empty() {
1151        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1152    }
1153
1154    builder.finish_node();
1155}
1156
1157/// Try to parse a pipe table starting at the given position.
1158/// Returns the number of lines consumed if successful.
1159pub(crate) fn try_parse_pipe_table(
1160    window: &StrippedLines<'_, '_>,
1161    builder: &mut GreenNodeBuilder<'static>,
1162    config: &ParserOptions,
1163) -> Option<usize> {
1164    let lines = window.raw();
1165    let start_pos = window.pos();
1166    if start_pos + 1 >= lines.len() {
1167        return None;
1168    }
1169
1170    // Cheap gate before the O(buffer) `strip_all` below: a pipe table's first
1171    // line must contain a `|` (it is either the header or, headerless, the
1172    // delimiter row), unless this is a caption-led table. Table detection runs
1173    // at every block start, so stripping the whole line buffer for every
1174    // prose/math paragraph was quadratic on large documents. When there is no
1175    // container prefix (the common case) `strip_at`/`is_caption_followed_by_table`
1176    // see exactly the same bytes as the stripped view, so this peek is
1177    // equivalent to the checks below; with a non-empty prefix we skip the gate
1178    // and fall through rather than risk a mismatch.
1179    if window.prefix().ops().is_empty()
1180        && !window.strip_at(start_pos).contains('|')
1181        && !is_caption_followed_by_table(lines, start_pos)
1182    {
1183        return None;
1184    }
1185
1186    // Detection scans run against a container-prefix-stripped view, so a
1187    // table nested in `list → blockquote` (e.g. `- > | a | b |`) has its
1188    // `  > ` prefix removed before the separator/cell shape checks. Each
1189    // entry is a no-alloc tail slice of the matching raw line; with an
1190    // empty prefix `stripped == lines`. The dispatch line uses the
1191    // emission-safe line-0 strip (its prefix was consumed by the core);
1192    // every other line gets the full continuation strip. Emission still
1193    // reads raw `lines` so the prefix bytes can be re-emitted as tokens.
1194    let stripped = window.strip_all();
1195
1196    // Check if this line is a caption followed by a table
1197    // If so, the actual table starts after the caption and blank line
1198    let (actual_start, caption_before) = if is_caption_followed_by_table(&stripped, start_pos) {
1199        let (cap_start, cap_end) = caption_range_starting_at(&stripped, start_pos)?;
1200        let mut pos = cap_end;
1201        while pos < stripped.len() && stripped[pos].trim().is_empty() {
1202            pos += 1;
1203        }
1204        (pos, Some((cap_start, cap_end)))
1205    } else {
1206        (start_pos, None)
1207    };
1208
1209    if actual_start + 1 >= lines.len() {
1210        return None;
1211    }
1212
1213    // First line should have pipes (potential header)
1214    if !stripped[actual_start].contains('|') {
1215        return None;
1216    }
1217
1218    // Second line should be separator
1219    let alignments = try_parse_pipe_separator(stripped[actual_start + 1])?;
1220
1221    // Parse header cells
1222    let header_cells = parse_pipe_table_row(stripped[actual_start]);
1223
1224    // Number of columns should match (approximately - be lenient)
1225    if header_cells.len() != alignments.len() && !header_cells.is_empty() {
1226        // Only fail if very different
1227        if header_cells.len() < alignments.len() / 2 || header_cells.len() > alignments.len() * 2 {
1228            return None;
1229        }
1230    }
1231
1232    // Find table end (first blank line or end of input)
1233    let mut end_pos = actual_start + 2;
1234    while end_pos < stripped.len() {
1235        let line = stripped[end_pos];
1236        if line.trim().is_empty() {
1237            break;
1238        }
1239        // Row should have pipes
1240        if !line.contains('|') {
1241            break;
1242        }
1243        end_pos += 1;
1244    }
1245
1246    // Must have at least one data row
1247    if end_pos <= actual_start + 2 {
1248        return None;
1249    }
1250
1251    // Check for caption before table (only if we didn't already detect it)
1252    let caption_before =
1253        caption_before.or_else(|| find_caption_before_table(&stripped, actual_start));
1254
1255    // Check for caption after table
1256    let caption_after = if caption_before.is_some() {
1257        None
1258    } else {
1259        find_caption_after_table(&stripped, end_pos)
1260    };
1261
1262    // Build the pipe table
1263    builder.start_node(SyntaxKind::PIPE_TABLE.into());
1264
1265    // Emit caption before if present
1266    if let Some((cap_start, cap_end)) = caption_before {
1267        emit_table_caption(builder, lines, cap_start, cap_end, config);
1268        // Emit blank line between caption and table if present
1269        if cap_end < actual_start {
1270            for line in lines.iter().take(actual_start).skip(cap_end) {
1271                if line.trim().is_empty() {
1272                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1273                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1274                    builder.finish_node();
1275                }
1276            }
1277        }
1278    }
1279
1280    // Emit header row with inline-parsed cells. On the dispatch line the
1281    // core already emitted the container prefix; only when the header is a
1282    // continuation line (e.g. it follows a caption-before line) do we emit
1283    // the prefix here.
1284    emit_pipe_table_row(
1285        builder,
1286        window,
1287        actual_start,
1288        SyntaxKind::TABLE_HEADER,
1289        config,
1290    );
1291
1292    // Emit separator, re-emitting any continuation-line container prefix
1293    // (`  > `) as WHITESPACE/BLOCK_QUOTE_MARKER tokens before the row text.
1294    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1295    let sep_idx = actual_start + 1;
1296    let separator_tail = if sep_idx == window.dispatch_pos() {
1297        window.dispatch_tail()
1298    } else {
1299        window.emit_prefix_at(builder, sep_idx)
1300    };
1301    emit_line_tokens(builder, separator_tail);
1302    builder.finish_node();
1303
1304    // Emit data rows with inline-parsed cells (always continuation lines)
1305    for idx in (actual_start + 2)..end_pos {
1306        emit_pipe_table_row(builder, window, idx, SyntaxKind::TABLE_ROW, config);
1307    }
1308
1309    // Emit caption after if present
1310    if let Some((cap_start, cap_end)) = caption_after {
1311        // Emit blank line before caption if needed
1312        if cap_start > end_pos {
1313            for line in lines.iter().take(cap_start).skip(end_pos) {
1314                if line.trim().is_empty() {
1315                    builder.start_node(SyntaxKind::BLANK_LINE.into());
1316                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
1317                    builder.finish_node();
1318                }
1319            }
1320        }
1321        emit_table_caption(builder, lines, cap_start, cap_end, config);
1322    }
1323
1324    builder.finish_node(); // PipeTable
1325
1326    // Calculate lines consumed
1327    let table_start = caption_before
1328        .map(|(start, _)| start)
1329        .unwrap_or(actual_start);
1330    let table_end = if let Some((_, cap_end)) = caption_after {
1331        cap_end
1332    } else {
1333        end_pos
1334    };
1335
1336    Some(table_end - table_start)
1337}
1338
1339#[cfg(test)]
1340mod tests {
1341    use super::super::container_prefix::ContainerPrefix;
1342    use super::*;
1343
1344    #[test]
1345    fn test_separator_detection() {
1346        assert!(try_parse_table_separator("------- ------ ----------   -------").is_some());
1347        assert!(try_parse_table_separator("  ---  ---  ---").is_some());
1348        assert!(try_parse_table_separator("-------").is_none()); // horizontal rule
1349        assert!(try_parse_table_separator("--- --- ---").is_some()); // table separator
1350    }
1351
1352    #[test]
1353    fn test_column_extraction() {
1354        let line = "-------     ------ ----------   -------";
1355        let columns = extract_columns(line, 0);
1356        assert_eq!(columns.len(), 4);
1357    }
1358
1359    #[test]
1360    fn test_simple_table_with_header() {
1361        let input = vec![
1362            "  Right     Left     Center     Default",
1363            "-------     ------ ----------   -------",
1364            "     12     12        12            12",
1365            "    123     123       123          123",
1366            "",
1367        ];
1368
1369        let mut builder = GreenNodeBuilder::new();
1370        let prefix = ContainerPrefix::default();
1371        let window = StrippedLines::new(&input, 0, &prefix);
1372        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1373
1374        assert!(result.is_some());
1375        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1376    }
1377
1378    #[test]
1379    fn test_headerless_table() {
1380        let input = vec![
1381            "-------     ------ ----------   -------",
1382            "     12     12        12            12",
1383            "    123     123       123          123",
1384            "",
1385        ];
1386
1387        let mut builder = GreenNodeBuilder::new();
1388        let prefix = ContainerPrefix::default();
1389        let window = StrippedLines::new(&input, 0, &prefix);
1390        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1391
1392        assert!(result.is_some());
1393        assert_eq!(result.unwrap(), 3); // sep + 2 rows
1394    }
1395
1396    #[test]
1397    fn test_caption_prefix_detection() {
1398        assert!(try_parse_caption_prefix("Table: My caption").is_some());
1399        assert!(try_parse_caption_prefix("table: My caption").is_some());
1400        assert!(try_parse_caption_prefix(": My caption").is_some());
1401        assert!(try_parse_caption_prefix(":").is_none()); // Just colon, no content
1402        assert!(try_parse_caption_prefix("Not a caption").is_none());
1403    }
1404
1405    #[test]
1406    fn bare_colon_fenced_code_is_not_table_caption() {
1407        let input = "Term\n: ```\n  code\n  ```\n";
1408        let tree = crate::parse(input, None);
1409
1410        assert!(
1411            tree.descendants()
1412                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1413            "should parse as definition list"
1414        );
1415        assert!(
1416            tree.descendants()
1417                .any(|node| node.kind() == SyntaxKind::CODE_BLOCK),
1418            "definition should preserve fenced code block"
1419        );
1420        assert!(
1421            !tree
1422                .descendants()
1423                .any(|node| node.kind() == SyntaxKind::TABLE_CAPTION),
1424            "fenced code definition should not be parsed as table caption"
1425        );
1426    }
1427
1428    #[test]
1429    fn bare_colon_caption_after_div_opening_is_table_caption() {
1430        let input = "::: {#tbl:panel layout.nrow=\"1\"}\n  : My Caption {#tbl:foo-1}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\n  : My Caption2 {#tbl:foo-2}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\nCaption\n:::\n";
1431        let tree = crate::parse(input, None);
1432
1433        let caption_count = tree
1434            .descendants()
1435            .filter(|node| node.kind() == SyntaxKind::TABLE_CAPTION)
1436            .count();
1437        assert_eq!(
1438            caption_count, 2,
1439            "expected both captions to attach to tables"
1440        );
1441        assert!(
1442            !tree
1443                .descendants()
1444                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1445            "caption lines in this fenced div table layout should not parse as definition list"
1446        );
1447    }
1448
1449    #[test]
1450    fn test_table_with_caption_after() {
1451        let input = vec![
1452            "  Right     Left     Center     Default",
1453            "-------     ------ ----------   -------",
1454            "     12     12        12            12",
1455            "    123     123       123          123",
1456            "",
1457            "Table: Demonstration of simple table syntax.",
1458            "",
1459        ];
1460
1461        let mut builder = GreenNodeBuilder::new();
1462        let prefix = ContainerPrefix::default();
1463        let window = StrippedLines::new(&input, 0, &prefix);
1464        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1465
1466        assert!(result.is_some());
1467        // Should consume: header + sep + 2 rows + blank + caption
1468        assert_eq!(result.unwrap(), 6);
1469    }
1470
1471    #[test]
1472    fn test_table_with_caption_before() {
1473        let input = vec![
1474            "Table: Demonstration of simple table syntax.",
1475            "",
1476            "  Right     Left     Center     Default",
1477            "-------     ------ ----------   -------",
1478            "     12     12        12            12",
1479            "    123     123       123          123",
1480            "",
1481        ];
1482
1483        let mut builder = GreenNodeBuilder::new();
1484        let prefix = ContainerPrefix::default();
1485        let window = StrippedLines::new(&input, 2, &prefix);
1486        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1487
1488        assert!(result.is_some());
1489        // Should consume: caption + blank + header + sep + 2 rows
1490        assert_eq!(result.unwrap(), 6);
1491    }
1492
1493    #[test]
1494    fn test_caption_with_colon_prefix() {
1495        let input = vec![
1496            "  Right     Left",
1497            "-------     ------",
1498            "     12     12",
1499            "",
1500            ": Short caption",
1501            "",
1502        ];
1503
1504        let mut builder = GreenNodeBuilder::new();
1505        let prefix = ContainerPrefix::default();
1506        let window = StrippedLines::new(&input, 0, &prefix);
1507        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1508
1509        assert!(result.is_some());
1510        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1511    }
1512
1513    #[test]
1514    fn test_multiline_caption() {
1515        let input = vec![
1516            "  Right     Left",
1517            "-------     ------",
1518            "     12     12",
1519            "",
1520            "Table: This is a longer caption",
1521            "that spans multiple lines.",
1522            "",
1523        ];
1524
1525        let mut builder = GreenNodeBuilder::new();
1526        let prefix = ContainerPrefix::default();
1527        let window = StrippedLines::new(&input, 0, &prefix);
1528        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1529
1530        assert!(result.is_some());
1531        // Should consume through end of multi-line caption
1532        assert_eq!(result.unwrap(), 6);
1533    }
1534
1535    #[test]
1536    fn test_simple_table_with_multibyte_cell_content() {
1537        let input = vec![
1538            "Name            Hex code     Hue     C, M, Y, K (%)   R, G, B (0-255)   R, G, B (%)",
1539            "--------------  ------------ ------- ---------------- ----------------- ------------",
1540            "        orange       #E69F00     41° 0, 50, 100, 0    230, 159, 0       90, 60, 0",
1541            "      sky blue       #56B4E9    202° 80, 0, 0, 0      86, 180, 233      35, 70, 90",
1542            "",
1543        ];
1544
1545        let mut builder = GreenNodeBuilder::new();
1546        let prefix = ContainerPrefix::default();
1547        let window = StrippedLines::new(&input, 0, &prefix);
1548        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1549
1550        assert!(result.is_some());
1551        assert_eq!(result.unwrap(), 4);
1552    }
1553
1554    // Pipe table tests
1555    #[test]
1556    fn test_pipe_separator_detection() {
1557        assert!(try_parse_pipe_separator("|------:|:-----|---------|:------:|").is_some());
1558        assert!(try_parse_pipe_separator("|---|---|").is_some());
1559        assert!(try_parse_pipe_separator("-----|-----:").is_some()); // No leading pipe
1560        assert!(try_parse_pipe_separator("|-----+-------|").is_some()); // Orgtbl variant
1561        assert!(try_parse_pipe_separator("not a separator").is_none());
1562    }
1563
1564    #[test]
1565    fn test_pipe_alignments() {
1566        let aligns = try_parse_pipe_separator("|------:|:-----|---------|:------:|").unwrap();
1567        assert_eq!(aligns.len(), 4);
1568        assert_eq!(aligns[0], Alignment::Right);
1569        assert_eq!(aligns[1], Alignment::Left);
1570        assert_eq!(aligns[2], Alignment::Default);
1571        assert_eq!(aligns[3], Alignment::Center);
1572    }
1573
1574    #[test]
1575    fn test_parse_pipe_table_row() {
1576        let cells = parse_pipe_table_row("| Right | Left | Center |");
1577        assert_eq!(cells.len(), 3);
1578        assert_eq!(cells[0], "Right");
1579        assert_eq!(cells[1], "Left");
1580        assert_eq!(cells[2], "Center");
1581
1582        // Without leading/trailing pipes
1583        let cells2 = parse_pipe_table_row("Right | Left | Center");
1584        assert_eq!(cells2.len(), 3);
1585    }
1586
1587    #[test]
1588    fn test_basic_pipe_table() {
1589        let input = vec![
1590            "",
1591            "| Right | Left | Center |",
1592            "|------:|:-----|:------:|",
1593            "|   12  |  12  |   12   |",
1594            "|  123  |  123 |  123   |",
1595            "",
1596        ];
1597
1598        let mut builder = GreenNodeBuilder::new();
1599        let prefix = ContainerPrefix::default();
1600        let window = StrippedLines::new(&input, 1, &prefix);
1601        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1602
1603        assert!(result.is_some());
1604        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1605    }
1606
1607    #[test]
1608    fn test_pipe_table_no_edge_pipes() {
1609        let input = vec![
1610            "",
1611            "fruit| price",
1612            "-----|-----:",
1613            "apple|2.05",
1614            "pear|1.37",
1615            "",
1616        ];
1617
1618        let mut builder = GreenNodeBuilder::new();
1619        let prefix = ContainerPrefix::default();
1620        let window = StrippedLines::new(&input, 1, &prefix);
1621        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1622
1623        assert!(result.is_some());
1624        assert_eq!(result.unwrap(), 4);
1625    }
1626
1627    #[test]
1628    fn test_pipe_table_with_caption() {
1629        let input = vec![
1630            "",
1631            "| Col1 | Col2 |",
1632            "|------|------|",
1633            "| A    | B    |",
1634            "",
1635            "Table: My pipe table",
1636            "",
1637        ];
1638
1639        let mut builder = GreenNodeBuilder::new();
1640        let prefix = ContainerPrefix::default();
1641        let window = StrippedLines::new(&input, 1, &prefix);
1642        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1643
1644        assert!(result.is_some());
1645        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1646    }
1647
1648    #[test]
1649    fn test_pipe_table_with_multiline_caption_before() {
1650        let input = vec![
1651            ": (#tab:base) base R quoting",
1652            "functions",
1653            "",
1654            "| C | D |",
1655            "|---|---|",
1656            "| 3 | 4 |",
1657            "",
1658        ];
1659
1660        let mut builder = GreenNodeBuilder::new();
1661        let prefix = ContainerPrefix::default();
1662        let window = StrippedLines::new(&input, 0, &prefix);
1663        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1664
1665        assert!(result.is_some());
1666        // caption(2) + blank(1) + header + sep + row
1667        assert_eq!(result.unwrap(), 6);
1668    }
1669}
1670
1671// ============================================================================
1672// Grid Table Parsing
1673// ============================================================================
1674
1675/// Check if a line is a grid table row separator (starts with +, contains -, ends with +).
1676/// Returns Some(vec of column info) if valid, None otherwise.
1677fn try_parse_grid_separator(line: &str) -> Option<Vec<GridColumn>> {
1678    let trimmed = line.trim_start();
1679    let leading_spaces = line.len() - trimmed.len();
1680
1681    // A grid border must begin at column 0 of its container content. Detection
1682    // runs on the container-prefix-stripped line (see `try_parse_grid_table`),
1683    // so any remaining leading whitespace means the border is indented relative
1684    // to its container -- pandoc parses that as a paragraph, not a grid table.
1685    if leading_spaces > 0 {
1686        return None;
1687    }
1688
1689    // Must start with + and end with +
1690    if !trimmed.starts_with('+') || !trimmed.trim_end().ends_with('+') {
1691        return None;
1692    }
1693
1694    // Split by + to get column segments
1695    let trimmed = trimmed.trim_end();
1696    let segments: Vec<&str> = trimmed.split('+').collect();
1697
1698    // Need at least 3 parts: empty before first +, column(s), empty after last +
1699    if segments.len() < 3 {
1700        return None;
1701    }
1702
1703    let mut columns = Vec::new();
1704
1705    // Parse each segment between + signs
1706    for segment in segments.iter().skip(1).take(segments.len() - 2) {
1707        if segment.is_empty() {
1708            continue;
1709        }
1710
1711        // Segment must be dashes/equals with optional colons for alignment
1712        let seg_trimmed = *segment;
1713
1714        // Get the fill character (after removing colons)
1715        let inner = seg_trimmed.trim_start_matches(':').trim_end_matches(':');
1716
1717        // Must be all dashes or all equals
1718        if inner.is_empty() {
1719            return None;
1720        }
1721
1722        let first_char = inner.chars().next().unwrap();
1723        if first_char != '-' && first_char != '=' {
1724            return None;
1725        }
1726
1727        if !inner.chars().all(|c| c == first_char) {
1728            return None;
1729        }
1730
1731        let is_header_sep = first_char == '=';
1732
1733        columns.push(GridColumn {
1734            is_header_separator: is_header_sep,
1735            width: seg_trimmed.chars().count(),
1736        });
1737    }
1738
1739    if columns.is_empty() {
1740        None
1741    } else {
1742        Some(columns)
1743    }
1744}
1745
1746/// Column information for grid tables.
1747#[derive(Debug, Clone)]
1748struct GridColumn {
1749    is_header_separator: bool,
1750    width: usize,
1751}
1752
1753fn slice_cell_by_display_width(line: &str, start_byte: usize, width: usize) -> (usize, usize) {
1754    let mut end_byte = start_byte;
1755    let mut display_cols = 0usize;
1756
1757    for (offset, ch) in line[start_byte..].char_indices() {
1758        if ch == '|' {
1759            let sep_byte = start_byte + offset;
1760            return (sep_byte, sep_byte + 1);
1761        }
1762        let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
1763        if display_cols + ch_width > width {
1764            break;
1765        }
1766        display_cols += ch_width;
1767        end_byte = start_byte + offset + ch.len_utf8();
1768        if display_cols >= width {
1769            break;
1770        }
1771    }
1772
1773    // If the width budget is exhausted before seeing a separator (for example
1774    // because of padding/layout drift), advance to the next literal separator
1775    // to keep row slicing aligned and preserve losslessness.
1776    let mut sep_byte = end_byte;
1777    while sep_byte < line.len() {
1778        let mut chars = line[sep_byte..].chars();
1779        let Some(ch) = chars.next() else {
1780            break;
1781        };
1782        if ch == '|' {
1783            return (sep_byte, sep_byte + 1);
1784        }
1785        sep_byte += ch.len_utf8();
1786    }
1787
1788    (end_byte, end_byte)
1789}
1790
1791/// Check if a line is a grid table content row.
1792/// Accepts normal rows ending with `|` and spanning-style continuation lines ending with `+`.
1793fn is_grid_content_row(line: &str) -> bool {
1794    let trimmed = line.trim_start();
1795    let leading_spaces = line.len() - trimmed.len();
1796
1797    if leading_spaces > 3 {
1798        return false;
1799    }
1800
1801    let trimmed = trimmed.trim_end();
1802    trimmed.starts_with('|') && (trimmed.ends_with('|') || trimmed.ends_with('+'))
1803}
1804
1805/// Extract cell contents from a single grid table row line.
1806/// Returns a vector of cell contents (trimmed) based on column boundaries.
1807/// Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1808fn extract_grid_cells_from_line(line: &str, _columns: &[GridColumn]) -> Vec<String> {
1809    let (line_content, _) = strip_newline(line);
1810    let line_trimmed = line_content.trim();
1811
1812    if !line_trimmed.starts_with('|') || !line_trimmed.ends_with('|') {
1813        return vec![String::new(); _columns.len()];
1814    }
1815
1816    let mut cells = Vec::with_capacity(_columns.len());
1817    let mut pos_byte = 1; // Skip leading pipe
1818
1819    for col in _columns {
1820        let col_idx = cells.len();
1821        if pos_byte >= line_trimmed.len() {
1822            cells.push(String::new());
1823            continue;
1824        }
1825
1826        let start_byte = pos_byte;
1827        let end_byte = if col_idx + 1 == _columns.len() {
1828            line_trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1829        } else {
1830            let (end, next_start) = slice_cell_by_display_width(line_trimmed, pos_byte, col.width);
1831            pos_byte = next_start;
1832            end
1833        };
1834        cells.push(line_trimmed[start_byte..end_byte].trim().to_string());
1835        if col_idx + 1 == _columns.len() {
1836            pos_byte = line_trimmed.len();
1837        }
1838    }
1839
1840    cells
1841}
1842
1843/// Emit a grid table row with inline-parsed cells.
1844/// Handles multi-line rows by emitting first line with TABLE_CELL nodes,
1845/// then continuation lines as raw TEXT for losslessness.
1846fn emit_grid_table_row(
1847    builder: &mut GreenNodeBuilder<'static>,
1848    window: &StrippedLines<'_, '_>,
1849    indices: &[usize],
1850    columns: &[GridColumn],
1851    row_kind: SyntaxKind,
1852    config: &ParserOptions,
1853) {
1854    if indices.is_empty() {
1855        return;
1856    }
1857
1858    builder.start_node(row_kind.into());
1859
1860    // Emit first line with TABLE_CELL nodes. The continuation-line container
1861    // prefix (`  > `) is re-emitted as WHITESPACE/BLOCK_QUOTE_MARKER tokens
1862    // inside the row node before the cell text; the returned tail is the
1863    // prefix-stripped line we slice cells from (empty prefix ⇒ raw line).
1864    // Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1865    let first_line = window.emit_or_dispatch_tail(builder, indices[0]);
1866    let cell_contents = extract_grid_cells_from_line(first_line, columns);
1867    let (line_without_newline, newline_str) = strip_newline(first_line);
1868    let trimmed = line_without_newline.trim();
1869    let expected_pipe_count = columns.len().saturating_add(1);
1870    let actual_pipe_count = trimmed.chars().filter(|&c| c == '|').count();
1871
1872    // Rows that don't contain all expected column separators (spanning-style rows)
1873    // must be emitted verbatim for losslessness. The first line's prefix was
1874    // already consumed above; emit its tail and each continuation tail.
1875    if actual_pipe_count != expected_pipe_count {
1876        emit_line_tokens(builder, first_line);
1877        for &idx in &indices[1..] {
1878            let tail = window.emit_or_dispatch_tail(builder, idx);
1879            emit_line_tokens(builder, tail);
1880        }
1881        builder.finish_node();
1882        return;
1883    }
1884
1885    // Emit leading whitespace
1886    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1887    if leading_ws_len > 0 {
1888        builder.token(
1889            SyntaxKind::WHITESPACE.into(),
1890            &line_without_newline[..leading_ws_len],
1891        );
1892    }
1893
1894    // Emit leading pipe
1895    if trimmed.starts_with('|') {
1896        builder.token(SyntaxKind::TEXT.into(), "|");
1897    }
1898
1899    // Emit each cell based on fixed column widths from separators
1900    let mut pos_byte = 1usize; // after leading pipe
1901    for (idx, cell_content) in cell_contents.iter().enumerate() {
1902        let part = if idx < columns.len() && pos_byte <= trimmed.len() {
1903            let start_byte = pos_byte;
1904            let end_byte = if idx + 1 == columns.len() && !trimmed.is_empty() {
1905                trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1906            } else {
1907                let (end, next_start) =
1908                    slice_cell_by_display_width(trimmed, pos_byte, columns[idx].width);
1909                pos_byte = next_start;
1910                end
1911            };
1912            let slice = &trimmed[start_byte..end_byte];
1913            if idx + 1 == columns.len() {
1914                pos_byte = trimmed.len();
1915            }
1916            slice
1917        } else {
1918            ""
1919        };
1920
1921        // Emit leading whitespace in cell
1922        let cell_trimmed = part.trim();
1923        let ws_start_len = part.len() - part.trim_start().len();
1924        if ws_start_len > 0 {
1925            builder.token(SyntaxKind::WHITESPACE.into(), &part[..ws_start_len]);
1926        }
1927
1928        // Emit TABLE_CELL with inline parsing
1929        emit_table_cell(builder, cell_content, config);
1930
1931        // Emit trailing whitespace in cell
1932        let ws_end_start = ws_start_len + cell_trimmed.len();
1933        if ws_end_start < part.len() {
1934            builder.token(SyntaxKind::WHITESPACE.into(), &part[ws_end_start..]);
1935        }
1936
1937        // Emit pipe separator (unless this is the last cell and line doesn't end with |)
1938        if idx < cell_contents.len() - 1 || trimmed.ends_with('|') {
1939            builder.token(SyntaxKind::TEXT.into(), "|");
1940        }
1941    }
1942
1943    // Emit trailing whitespace before newline
1944    let trailing_ws_start = leading_ws_len + trimmed.len();
1945    if trailing_ws_start < line_without_newline.len() {
1946        builder.token(
1947            SyntaxKind::WHITESPACE.into(),
1948            &line_without_newline[trailing_ws_start..],
1949        );
1950    }
1951
1952    // Emit newline
1953    if !newline_str.is_empty() {
1954        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1955    }
1956
1957    // Emit continuation lines as TEXT for losslessness, re-emitting each
1958    // line's container prefix first.
1959    for &idx in &indices[1..] {
1960        let tail = window.emit_or_dispatch_tail(builder, idx);
1961        emit_line_tokens(builder, tail);
1962    }
1963
1964    builder.finish_node();
1965}
1966
1967/// Try to parse a grid table starting at the given position.
1968/// Returns the number of lines consumed if successful.
1969pub(crate) fn try_parse_grid_table(
1970    window: &StrippedLines<'_, '_>,
1971    builder: &mut GreenNodeBuilder<'static>,
1972    config: &ParserOptions,
1973) -> Option<usize> {
1974    let lines = window.raw();
1975    let start_pos = window.pos();
1976    if start_pos >= lines.len() {
1977        return None;
1978    }
1979
1980    // Detection scans run against the container-prefix-stripped view so a
1981    // grid table nested in `list → blockquote` (e.g. `- > +---+---+`) has its
1982    // `  > ` prefix removed before the separator/content-row shape checks.
1983    // With an empty prefix `stripped == lines`. Emission re-emits the prefix
1984    // bytes as tokens via the window; captions/blank lines read raw `lines`.
1985    //
1986    // Cheap gate before the O(buffer) `strip_all` below: a grid table's first
1987    // line is a grid separator (`+---+`/`+===+`), unless this is a caption-led
1988    // table. Table detection runs at every block start, so stripping the whole
1989    // line buffer for every prose/math paragraph was quadratic on large
1990    // documents. When there is no container prefix (the common case) the
1991    // dispatch line's bytes are identical to the stripped view this function
1992    // builds below, so this peek is equivalent to the `try_parse_grid_separator`
1993    // check; with a non-empty prefix we skip the gate and fall through.
1994    if window.prefix().ops().is_empty()
1995        && try_parse_grid_separator(window.strip_at(start_pos)).is_none()
1996        && !is_caption_followed_by_table(lines, start_pos)
1997    {
1998        return None;
1999    }
2000
2001    // `strip_all` keeps the dispatch line's list-indent (via
2002    // `strip_line_0_for_emission`) so emission re-injects those bytes
2003    // correctly. For grid-border *detection*, the strict column-0 check in
2004    // `try_parse_grid_separator` would then reject a border that's actually
2005    // sitting at column 0 of the list-item's inner content — so force the
2006    // dispatch line to its fully-stripped view here. Emission still goes
2007    // through `window.emit_or_dispatch_tail`, which preserves the indent
2008    // bytes.
2009    let mut stripped = window.strip_all();
2010    let dispatch_pos = window.dispatch_pos();
2011    if dispatch_pos < stripped.len() {
2012        stripped[dispatch_pos] = window.prefix().strip(lines[dispatch_pos]);
2013    }
2014
2015    // Check if this line is a caption followed by a table
2016    // If so, the actual table starts after the caption and blank line
2017    let (actual_start, caption_before) = if is_caption_followed_by_table(&stripped, start_pos) {
2018        let (cap_start, cap_end) = caption_range_starting_at(&stripped, start_pos)?;
2019        let mut pos = cap_end;
2020        while pos < stripped.len() && stripped[pos].trim().is_empty() {
2021            pos += 1;
2022        }
2023        (pos, Some((cap_start, cap_end)))
2024    } else {
2025        (start_pos, None)
2026    };
2027
2028    if actual_start >= lines.len() {
2029        return None;
2030    }
2031
2032    // First line must be a grid separator
2033    let first_line = stripped[actual_start];
2034    let _columns = try_parse_grid_separator(first_line)?;
2035
2036    // Track table structure
2037    let mut end_pos = actual_start + 1;
2038    let mut found_header_sep = false;
2039    let mut in_footer = false;
2040
2041    // Scan table lines
2042    while end_pos < lines.len() {
2043        let line = stripped[end_pos];
2044
2045        // Check for blank line (table ends)
2046        if line.trim().is_empty() {
2047            break;
2048        }
2049
2050        // Check for separator line
2051        if let Some(sep_cols) = try_parse_grid_separator(line) {
2052            // Check if this is a header separator (=)
2053            if sep_cols.iter().any(|c| c.is_header_separator) {
2054                if !found_header_sep {
2055                    found_header_sep = true;
2056                } else if !in_footer {
2057                    // Second = separator starts footer
2058                    in_footer = true;
2059                }
2060            }
2061            end_pos += 1;
2062            continue;
2063        }
2064
2065        // Check for content row
2066        if is_grid_content_row(line) {
2067            end_pos += 1;
2068            continue;
2069        }
2070
2071        // Not a valid grid table line - table ends
2072        break;
2073    }
2074
2075    // Must have consumed at least 3 lines (top separator, content, bottom separator)
2076    // Or just top + content rows that end with a separator
2077    if end_pos <= actual_start + 1 {
2078        return None;
2079    }
2080
2081    // Last consumed line should be a separator for a well-formed table
2082    // But we'll be lenient and accept tables ending with content rows
2083
2084    // Check for caption before table (only if we didn't already detected it)
2085    let caption_before =
2086        caption_before.or_else(|| find_caption_before_table(&stripped, actual_start));
2087
2088    // Check for caption after table
2089    let caption_after = if caption_before.is_some() {
2090        None
2091    } else {
2092        find_caption_after_table(&stripped, end_pos)
2093    };
2094
2095    // Build the grid table
2096    builder.start_node(SyntaxKind::GRID_TABLE.into());
2097
2098    // Emit caption before if present
2099    if let Some((cap_start, cap_end)) = caption_before {
2100        emit_table_caption(builder, lines, cap_start, cap_end, config);
2101        // Emit blank line between caption and table if present
2102        if cap_end < actual_start {
2103            for line in lines.iter().take(actual_start).skip(cap_end) {
2104                if line.trim().is_empty() {
2105                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2106                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2107                    builder.finish_node();
2108                }
2109            }
2110        }
2111    }
2112
2113    // Track whether we've passed the header separator
2114    let mut past_header_sep = false;
2115    let mut in_footer_section = false;
2116    // Accumulate ABSOLUTE indices of the lines making up a multi-line row, so
2117    // each line's container prefix can be re-emitted via the window.
2118    let mut current_row_indices: Vec<usize> = Vec::new();
2119    let mut current_row_kind = SyntaxKind::TABLE_HEADER;
2120
2121    // Emit table rows - accumulate multi-line cells
2122    for (idx, &line) in stripped.iter().enumerate().take(end_pos).skip(actual_start) {
2123        if let Some(sep_cols) = try_parse_grid_separator(line) {
2124            // Separator line - emit any accumulated row first
2125            if !current_row_indices.is_empty() {
2126                emit_grid_table_row(
2127                    builder,
2128                    window,
2129                    &current_row_indices,
2130                    &sep_cols,
2131                    current_row_kind,
2132                    config,
2133                );
2134                current_row_indices.clear();
2135            }
2136
2137            let is_header_sep = sep_cols.iter().any(|c| c.is_header_separator);
2138
2139            // Re-emit any continuation-line container prefix (`  > `) as
2140            // WHITESPACE/BLOCK_QUOTE_MARKER tokens before the separator text.
2141            if is_header_sep {
2142                if !past_header_sep {
2143                    // This is the header/body separator
2144                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2145                    let tail = window.emit_or_dispatch_tail(builder, idx);
2146                    emit_line_tokens(builder, tail);
2147                    builder.finish_node();
2148                    past_header_sep = true;
2149                } else {
2150                    // Footer separator
2151                    if !in_footer_section {
2152                        in_footer_section = true;
2153                    }
2154                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2155                    let tail = window.emit_or_dispatch_tail(builder, idx);
2156                    emit_line_tokens(builder, tail);
2157                    builder.finish_node();
2158                }
2159            } else {
2160                // Regular separator (row boundary)
2161                builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2162                let tail = window.emit_or_dispatch_tail(builder, idx);
2163                emit_line_tokens(builder, tail);
2164                builder.finish_node();
2165            }
2166        } else if is_grid_content_row(line) {
2167            // Content row - accumulate for multi-line cells
2168            current_row_kind = if !past_header_sep && found_header_sep {
2169                SyntaxKind::TABLE_HEADER
2170            } else if in_footer_section {
2171                SyntaxKind::TABLE_FOOTER
2172            } else {
2173                SyntaxKind::TABLE_ROW
2174            };
2175
2176            current_row_indices.push(idx);
2177        }
2178    }
2179
2180    // Emit any remaining accumulated row
2181    if !current_row_indices.is_empty() {
2182        // Use first separator's columns for cell boundaries
2183        if let Some(sep_cols) = try_parse_grid_separator(stripped[actual_start]) {
2184            emit_grid_table_row(
2185                builder,
2186                window,
2187                &current_row_indices,
2188                &sep_cols,
2189                current_row_kind,
2190                config,
2191            );
2192        }
2193    }
2194
2195    // Emit caption after if present
2196    if let Some((cap_start, cap_end)) = caption_after {
2197        if cap_start > end_pos {
2198            for line in lines.iter().take(cap_start).skip(end_pos) {
2199                if line.trim().is_empty() {
2200                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2201                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2202                    builder.finish_node();
2203                }
2204            }
2205        }
2206        emit_table_caption(builder, lines, cap_start, cap_end, config);
2207    }
2208
2209    builder.finish_node(); // GRID_TABLE
2210
2211    // Calculate lines consumed
2212    let table_start = caption_before
2213        .map(|(start, _)| start)
2214        .unwrap_or(actual_start);
2215    let table_end = if let Some((_, cap_end)) = caption_after {
2216        cap_end
2217    } else {
2218        end_pos
2219    };
2220
2221    Some(table_end - table_start)
2222}
2223
2224#[cfg(test)]
2225mod grid_table_tests {
2226    use super::super::container_prefix::ContainerPrefix;
2227    use super::*;
2228
2229    #[test]
2230    fn test_grid_separator_detection() {
2231        assert!(try_parse_grid_separator("+---+---+").is_some());
2232        assert!(try_parse_grid_separator("+===+===+").is_some());
2233        assert!(try_parse_grid_separator("+---------------+---------------+").is_some());
2234        assert!(try_parse_grid_separator("+:---:+").is_some()); // center aligned
2235        assert!(try_parse_grid_separator("not a separator").is_none());
2236        assert!(try_parse_grid_separator("|---|---|").is_none()); // pipe table sep
2237
2238        // A grid border must sit at column 0 of its container content; an
2239        // indented border is not a grid table (matches pandoc, which parses
2240        // an indented `+---+` as a paragraph). Detection runs on the
2241        // container-stripped line, so any remaining leading space disqualifies.
2242        assert!(try_parse_grid_separator(" +---+---+").is_none());
2243        assert!(try_parse_grid_separator("  +---+---+").is_none());
2244        assert!(try_parse_grid_separator("   +===+===+").is_none());
2245    }
2246
2247    #[test]
2248    fn test_grid_header_separator() {
2249        let cols = try_parse_grid_separator("+===+===+").unwrap();
2250        assert!(cols.iter().all(|c| c.is_header_separator));
2251
2252        let cols2 = try_parse_grid_separator("+---+---+").unwrap();
2253        assert!(cols2.iter().all(|c| !c.is_header_separator));
2254    }
2255
2256    #[test]
2257    fn test_grid_content_row_detection() {
2258        assert!(is_grid_content_row("| content | content |"));
2259        assert!(is_grid_content_row("|  |  |"));
2260        assert!(is_grid_content_row("| content +------+"));
2261        assert!(!is_grid_content_row("+---+---+")); // separator, not content
2262        assert!(!is_grid_content_row("no pipes here"));
2263    }
2264
2265    #[test]
2266    fn test_basic_grid_table() {
2267        let input = vec![
2268            "+-------+-------+",
2269            "| Col1  | Col2  |",
2270            "+=======+=======+",
2271            "| A     | B     |",
2272            "+-------+-------+",
2273            "",
2274        ];
2275
2276        let mut builder = GreenNodeBuilder::new();
2277        let prefix = ContainerPrefix::default();
2278        let window = StrippedLines::new(&input, 0, &prefix);
2279        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2280
2281        assert!(result.is_some());
2282        assert_eq!(result.unwrap(), 5);
2283    }
2284
2285    #[test]
2286    fn test_grid_table_multirow() {
2287        let input = vec![
2288            "+---------------+---------------+",
2289            "| Fruit         | Advantages    |",
2290            "+===============+===============+",
2291            "| Bananas       | - wrapper     |",
2292            "|               | - color       |",
2293            "+---------------+---------------+",
2294            "| Oranges       | - scurvy      |",
2295            "|               | - tasty       |",
2296            "+---------------+---------------+",
2297            "",
2298        ];
2299
2300        let mut builder = GreenNodeBuilder::new();
2301        let prefix = ContainerPrefix::default();
2302        let window = StrippedLines::new(&input, 0, &prefix);
2303        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2304
2305        assert!(result.is_some());
2306        assert_eq!(result.unwrap(), 9);
2307    }
2308
2309    #[test]
2310    fn test_grid_table_with_footer() {
2311        let input = vec![
2312            "+-------+-------+",
2313            "| Fruit | Price |",
2314            "+=======+=======+",
2315            "| Apple | $1.00 |",
2316            "+-------+-------+",
2317            "| Pear  | $1.50 |",
2318            "+=======+=======+",
2319            "| Total | $2.50 |",
2320            "+=======+=======+",
2321            "",
2322        ];
2323
2324        let mut builder = GreenNodeBuilder::new();
2325        let prefix = ContainerPrefix::default();
2326        let window = StrippedLines::new(&input, 0, &prefix);
2327        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2328
2329        assert!(result.is_some());
2330        assert_eq!(result.unwrap(), 9);
2331    }
2332
2333    #[test]
2334    fn test_grid_table_headerless() {
2335        let input = vec![
2336            "+-------+-------+",
2337            "| A     | B     |",
2338            "+-------+-------+",
2339            "| C     | D     |",
2340            "+-------+-------+",
2341            "",
2342        ];
2343
2344        let mut builder = GreenNodeBuilder::new();
2345        let prefix = ContainerPrefix::default();
2346        let window = StrippedLines::new(&input, 0, &prefix);
2347        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2348
2349        assert!(result.is_some());
2350        assert_eq!(result.unwrap(), 5);
2351    }
2352
2353    #[test]
2354    fn test_grid_table_with_caption_before() {
2355        let input = vec![
2356            ": Sample table",
2357            "",
2358            "+-------+-------+",
2359            "| A     | B     |",
2360            "+=======+=======+",
2361            "| C     | D     |",
2362            "+-------+-------+",
2363            "",
2364        ];
2365
2366        let mut builder = GreenNodeBuilder::new();
2367        let prefix = ContainerPrefix::default();
2368        let window = StrippedLines::new(&input, 2, &prefix);
2369        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2370
2371        assert!(result.is_some());
2372        // Should include caption + blank + table
2373        assert_eq!(result.unwrap(), 7);
2374    }
2375
2376    #[test]
2377    fn test_grid_table_with_caption_after() {
2378        let input = vec![
2379            "+-------+-------+",
2380            "| A     | B     |",
2381            "+=======+=======+",
2382            "| C     | D     |",
2383            "+-------+-------+",
2384            "",
2385            "Table: My grid table",
2386            "",
2387        ];
2388
2389        let mut builder = GreenNodeBuilder::new();
2390        let prefix = ContainerPrefix::default();
2391        let window = StrippedLines::new(&input, 0, &prefix);
2392        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2393
2394        assert!(result.is_some());
2395        // table + blank + caption
2396        assert_eq!(result.unwrap(), 7);
2397    }
2398}
2399
2400// ============================================================================
2401// Multiline Table Parsing
2402// ============================================================================
2403
2404/// Check if a line is a multiline table separator (continuous dashes).
2405/// Multiline table separators span the full width and are all dashes.
2406/// Returns Some(columns) if valid, None otherwise.
2407fn try_parse_multiline_separator(line: &str) -> Option<Vec<Column>> {
2408    let trimmed = line.trim_start();
2409    let leading_spaces = line.len() - trimmed.len();
2410
2411    // Must have leading spaces <= 3 to not be a code block
2412    if leading_spaces > 3 {
2413        return None;
2414    }
2415
2416    let trimmed = trimmed.trim_end();
2417
2418    // Must be all dashes (continuous line of dashes)
2419    if trimmed.is_empty() || !trimmed.chars().all(|c| c == '-') {
2420        return None;
2421    }
2422
2423    // Must have at least 3 dashes
2424    if trimmed.len() < 3 {
2425        return None;
2426    }
2427
2428    // This is a full-width separator - columns will be determined by column separator lines
2429    Some(vec![Column {
2430        start: leading_spaces,
2431        end: leading_spaces + trimmed.len(),
2432        alignment: Alignment::Default,
2433    }])
2434}
2435
2436/// Check if a line is a column separator line for multiline tables.
2437/// Column separators have dashes with spaces between them to define columns.
2438fn is_column_separator(line: &str) -> bool {
2439    try_parse_table_separator(line).is_some() && !line.contains('*') && !line.contains('_')
2440}
2441
2442fn is_headerless_single_row_without_blank(
2443    lines: &[&str],
2444    row_start: usize,
2445    row_end: usize,
2446    columns: &[Column],
2447) -> bool {
2448    if row_start >= row_end {
2449        return false;
2450    }
2451
2452    if row_end - row_start == 1 {
2453        return false;
2454    }
2455
2456    let Some(last_col) = columns.last() else {
2457        return false;
2458    };
2459
2460    for line in lines.iter().take(row_end).skip(row_start + 1) {
2461        let (content, _) = strip_newline(line);
2462        let prefix_end = last_col.start.min(content.len());
2463        if !content[..prefix_end].trim().is_empty() {
2464            return false;
2465        }
2466    }
2467
2468    true
2469}
2470
2471/// Try to parse a multiline table starting at the given position.
2472/// Returns the number of lines consumed if successful.
2473pub(crate) fn try_parse_multiline_table(
2474    window: &StrippedLines<'_, '_>,
2475    builder: &mut GreenNodeBuilder<'static>,
2476    config: &ParserOptions,
2477) -> Option<usize> {
2478    let lines = window.raw();
2479    let start_pos = window.pos();
2480    if start_pos >= lines.len() {
2481        return None;
2482    }
2483
2484    // Cheap gate before the O(buffer) `strip_all` below: a multiline table's
2485    // first line is either a full-width dash separator or a column separator.
2486    // Table detection runs at every block start, so stripping the whole line
2487    // buffer for every paragraph that can't begin a multiline table was
2488    // quadratic on large documents. Peek just the dispatch line via `strip_at`
2489    // and bail before materializing the full view.
2490    let first_line = window.strip_at(start_pos);
2491
2492    // First line can be either:
2493    // 1. A full-width dash separator (for tables with headers)
2494    // 2. A column separator (for headerless tables)
2495    let is_full_width_start = try_parse_multiline_separator(first_line).is_some();
2496    let is_column_sep_start = !is_full_width_start && is_column_separator(first_line);
2497    if !is_full_width_start && !is_column_sep_start {
2498        return None;
2499    }
2500
2501    // Detection scans run against the container-prefix-stripped view so a
2502    // multiline table nested in `list → blockquote` (e.g. `- > ----`) has its
2503    // `  > ` prefix removed before the separator/blank-row shape checks. The
2504    // interior `>`-only row then strips to `""` and registers as a blank row
2505    // separator. With an empty prefix `stripped == lines`. Emission re-emits
2506    // the prefix bytes as tokens via the window; captions read raw `lines`.
2507    let stripped = window.strip_all();
2508
2509    let headerless_columns = if is_column_sep_start {
2510        try_parse_table_separator(stripped[start_pos])
2511    } else {
2512        None
2513    };
2514
2515    // Look ahead to find the structure
2516    let mut pos = start_pos + 1;
2517    let mut found_column_sep = is_column_sep_start; // Already found if headerless
2518    let mut column_sep_pos = if is_column_sep_start { start_pos } else { 0 };
2519    let mut has_header = false;
2520    let mut found_blank_line = false;
2521    let mut found_closing_sep = false;
2522    let mut content_line_count = 0usize;
2523
2524    // Scan for header section and column separator
2525    while pos < lines.len() {
2526        let line = stripped[pos];
2527
2528        // Check for column separator (defines columns) - only if we started with full-width
2529        if is_full_width_start && is_column_separator(line) && !found_column_sep {
2530            found_column_sep = true;
2531            column_sep_pos = pos;
2532            has_header = pos > start_pos + 1; // Has header if there's content before column sep
2533            pos += 1;
2534            continue;
2535        }
2536
2537        // Check for blank line (row separator in body)
2538        if line.trim().is_empty() {
2539            found_blank_line = true;
2540            pos += 1;
2541            // Check if next line is a valid closing separator for this table shape.
2542            if pos < lines.len() {
2543                let next = stripped[pos];
2544                let is_valid_closer = if is_full_width_start {
2545                    try_parse_multiline_separator(next).is_some()
2546                } else {
2547                    is_column_separator(next)
2548                };
2549                if is_valid_closer {
2550                    found_closing_sep = true;
2551                    pos += 1; // Include the closing separator
2552                    break;
2553                }
2554            }
2555            continue;
2556        }
2557
2558        // Check for closing full-width dashes (only for full-width-start tables).
2559        if is_full_width_start && try_parse_multiline_separator(line).is_some() {
2560            found_closing_sep = true;
2561            pos += 1;
2562            break;
2563        }
2564
2565        // Check for closing column separator (for headerless tables)
2566        if is_column_sep_start && is_column_separator(line) && content_line_count > 0 {
2567            found_closing_sep = true;
2568            pos += 1;
2569            break;
2570        }
2571
2572        // Content row
2573        content_line_count += 1;
2574        pos += 1;
2575    }
2576
2577    // Must have found a column separator to be a valid multiline table
2578    if !found_column_sep {
2579        return None;
2580    }
2581
2582    // Must have had at least one blank line between rows (distinguishes from simple tables)
2583    if !found_blank_line {
2584        if !is_column_sep_start {
2585            return None;
2586        }
2587        let columns = headerless_columns.as_deref()?;
2588        if !is_headerless_single_row_without_blank(&stripped, start_pos + 1, pos - 1, columns) {
2589            return None;
2590        }
2591    }
2592
2593    // Must have a closing separator
2594    if !found_closing_sep {
2595        return None;
2596    }
2597
2598    // Must have consumed more than just the opening separator
2599    if pos <= start_pos + 2 {
2600        return None;
2601    }
2602
2603    let end_pos = pos;
2604
2605    // Extract column boundaries from the separator line
2606    let columns = try_parse_table_separator(stripped[column_sep_pos])
2607        .expect("Column separator must be valid");
2608
2609    // Check for caption before table
2610    let caption_before = find_caption_before_table(&stripped, start_pos);
2611
2612    // Check for caption after table
2613    let caption_after = if caption_before.is_some() {
2614        None
2615    } else {
2616        find_caption_after_table(&stripped, end_pos)
2617    };
2618
2619    // Build the multiline table
2620    builder.start_node(SyntaxKind::MULTILINE_TABLE.into());
2621
2622    // Emit caption before if present
2623    if let Some((cap_start, cap_end)) = caption_before {
2624        emit_table_caption(builder, lines, cap_start, cap_end, config);
2625
2626        // Emit blank line between caption and table if present
2627        if cap_end < start_pos {
2628            for line in lines.iter().take(start_pos).skip(cap_end) {
2629                if line.trim().is_empty() {
2630                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2631                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2632                    builder.finish_node();
2633                }
2634            }
2635        }
2636    }
2637
2638    // Emit opening separator. The dispatch line's prefix was already consumed
2639    // by core (`dispatch_tail`); a non-dispatch start (caption-before case)
2640    // re-emits its `  > ` prefix via `emit_prefix_at`.
2641    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2642    let tail = window.emit_or_dispatch_tail(builder, start_pos);
2643    emit_line_tokens(builder, tail);
2644    builder.finish_node();
2645
2646    // Track state for emitting. Accumulate ABSOLUTE indices of the lines making
2647    // up a multi-line row so each line's container prefix can be re-emitted via
2648    // the window.
2649    let mut in_header = has_header;
2650    let mut current_row_indices: Vec<usize> = Vec::new();
2651
2652    for (i, &line) in stripped
2653        .iter()
2654        .enumerate()
2655        .take(end_pos)
2656        .skip(start_pos + 1)
2657    {
2658        // Column separator (header/body divider)
2659        if i == column_sep_pos {
2660            // Emit any accumulated header lines
2661            if !current_row_indices.is_empty() {
2662                emit_multiline_table_row(
2663                    builder,
2664                    window,
2665                    &current_row_indices,
2666                    &columns,
2667                    SyntaxKind::TABLE_HEADER,
2668                    config,
2669                );
2670                current_row_indices.clear();
2671            }
2672
2673            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2674            let tail = window.emit_or_dispatch_tail(builder, i);
2675            emit_line_tokens(builder, tail);
2676            builder.finish_node();
2677            in_header = false;
2678            continue;
2679        }
2680
2681        // Closing separator (full-width or column separator at end)
2682        if try_parse_multiline_separator(line).is_some() || is_column_separator(line) {
2683            // Emit any accumulated row lines
2684            if !current_row_indices.is_empty() {
2685                let kind = if in_header {
2686                    SyntaxKind::TABLE_HEADER
2687                } else {
2688                    SyntaxKind::TABLE_ROW
2689                };
2690                emit_multiline_table_row(
2691                    builder,
2692                    window,
2693                    &current_row_indices,
2694                    &columns,
2695                    kind,
2696                    config,
2697                );
2698                current_row_indices.clear();
2699            }
2700
2701            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2702            let tail = window.emit_or_dispatch_tail(builder, i);
2703            emit_line_tokens(builder, tail);
2704            builder.finish_node();
2705            continue;
2706        }
2707
2708        // Blank line (row separator)
2709        if line.trim().is_empty() {
2710            // Emit accumulated row
2711            if !current_row_indices.is_empty() {
2712                let kind = if in_header {
2713                    SyntaxKind::TABLE_HEADER
2714                } else {
2715                    SyntaxKind::TABLE_ROW
2716                };
2717                emit_multiline_table_row(
2718                    builder,
2719                    window,
2720                    &current_row_indices,
2721                    &columns,
2722                    kind,
2723                    config,
2724                );
2725                current_row_indices.clear();
2726            }
2727
2728            // Re-emit the interior `>`-only separator row's container prefix
2729            // (`  > `) inside the BLANK_LINE node so it round-trips losslessly.
2730            builder.start_node(SyntaxKind::BLANK_LINE.into());
2731            let tail = window.emit_or_dispatch_tail(builder, i);
2732            builder.token(SyntaxKind::BLANK_LINE.into(), tail);
2733            builder.finish_node();
2734            continue;
2735        }
2736
2737        // Content line - accumulate for current row
2738        current_row_indices.push(i);
2739    }
2740
2741    // Emit any remaining accumulated lines
2742    if !current_row_indices.is_empty() {
2743        let kind = if in_header {
2744            SyntaxKind::TABLE_HEADER
2745        } else {
2746            SyntaxKind::TABLE_ROW
2747        };
2748        emit_multiline_table_row(
2749            builder,
2750            window,
2751            &current_row_indices,
2752            &columns,
2753            kind,
2754            config,
2755        );
2756    }
2757
2758    // Emit caption after if present
2759    if let Some((cap_start, cap_end)) = caption_after {
2760        if cap_start > end_pos {
2761            for line in lines.iter().take(cap_start).skip(end_pos) {
2762                if line.trim().is_empty() {
2763                    builder.start_node(SyntaxKind::BLANK_LINE.into());
2764                    builder.token(SyntaxKind::BLANK_LINE.into(), line);
2765                    builder.finish_node();
2766                }
2767            }
2768        }
2769        emit_table_caption(builder, lines, cap_start, cap_end, config);
2770    }
2771
2772    builder.finish_node(); // MultilineTable
2773
2774    // Calculate lines consumed
2775    let table_start = caption_before.map(|(start, _)| start).unwrap_or(start_pos);
2776    let table_end = if let Some((_, cap_end)) = caption_after {
2777        cap_end
2778    } else {
2779        end_pos
2780    };
2781
2782    Some(table_end - table_start)
2783}
2784
2785/// Extract cell contents from first line only (for CST emission).
2786/// Multi-line content will be in continuation TEXT tokens.
2787fn extract_first_line_cell_contents(line: &str, columns: &[Column]) -> Vec<String> {
2788    let (line_content, _) = strip_newline(line);
2789    let mut cells = Vec::new();
2790
2791    for column in columns.iter() {
2792        let column_start = column_offset_to_byte_index(line_content, column.start);
2793        let column_end = column_offset_to_byte_index(line_content, column.end);
2794
2795        // Extract FULL text for this column (including whitespace)
2796        let cell_text = if column_start < column_end {
2797            &line_content[column_start..column_end]
2798        } else if column_start < line_content.len() {
2799            &line_content[column_start..]
2800        } else {
2801            ""
2802        };
2803
2804        cells.push(cell_text.to_string());
2805    }
2806
2807    cells
2808}
2809
2810/// Emit a multiline table row with inline parsing (Phase 7.1).
2811///
2812/// `indices` are ABSOLUTE line indices into the window's raw buffer; each
2813/// physical line re-emits its container prefix (`  > `) via the window before
2814/// its content. With an empty prefix the tails equal the raw lines, so emission
2815/// is byte-identical to the pre-window path.
2816fn emit_multiline_table_row(
2817    builder: &mut GreenNodeBuilder<'static>,
2818    window: &StrippedLines<'_, '_>,
2819    indices: &[usize],
2820    columns: &[Column],
2821    kind: SyntaxKind,
2822    config: &ParserOptions,
2823) {
2824    if indices.is_empty() {
2825        return;
2826    }
2827
2828    builder.start_node(kind.into());
2829
2830    // Emit the first line's container prefix as tokens, then slice cells from
2831    // the prefix-stripped tail (for CST losslessness, only the first physical
2832    // line is parsed into cells; continuation lines stay verbatim TEXT).
2833    let first_line = window.emit_or_dispatch_tail(builder, indices[0]);
2834    let cell_contents = extract_first_line_cell_contents(first_line, columns);
2835    let (trimmed, newline_str) = strip_newline(first_line);
2836    let mut current_pos = 0;
2837
2838    for (col_idx, column) in columns.iter().enumerate() {
2839        let cell_text = &cell_contents[col_idx];
2840        let cell_start = column_offset_to_byte_index(trimmed, column.start);
2841        let cell_end = column_offset_to_byte_index(trimmed, column.end);
2842
2843        // Emit whitespace before cell
2844        if current_pos < cell_start {
2845            builder.token(
2846                SyntaxKind::WHITESPACE.into(),
2847                &trimmed[current_pos..cell_start],
2848            );
2849        }
2850
2851        // Emit cell with inline parsing (first line content only)
2852        emit_table_cell(builder, cell_text, config);
2853
2854        current_pos = cell_end;
2855    }
2856
2857    // Emit trailing whitespace
2858    if current_pos < trimmed.len() {
2859        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
2860    }
2861
2862    // Emit newline
2863    if !newline_str.is_empty() {
2864        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2865    }
2866
2867    // Emit continuation lines as TEXT to preserve exact line structure,
2868    // re-emitting each line's container prefix first.
2869    for &idx in &indices[1..] {
2870        let tail = window.emit_or_dispatch_tail(builder, idx);
2871        emit_line_tokens(builder, tail);
2872    }
2873
2874    builder.finish_node();
2875}
2876
2877#[cfg(test)]
2878mod multiline_table_tests {
2879    use super::super::container_prefix::ContainerPrefix;
2880    use super::*;
2881    use crate::syntax::SyntaxNode;
2882
2883    #[test]
2884    fn test_multiline_separator_detection() {
2885        assert!(
2886            try_parse_multiline_separator(
2887                "-------------------------------------------------------------"
2888            )
2889            .is_some()
2890        );
2891        assert!(try_parse_multiline_separator("---").is_some());
2892        assert!(try_parse_multiline_separator("  -----").is_some()); // with leading spaces
2893        assert!(try_parse_multiline_separator("--").is_none()); // too short
2894        assert!(try_parse_multiline_separator("--- ---").is_none()); // has spaces
2895        assert!(try_parse_multiline_separator("+---+").is_none()); // grid separator
2896    }
2897
2898    #[test]
2899    fn test_basic_multiline_table() {
2900        let input = vec![
2901            "-------------------------------------------------------------",
2902            " Centered   Default           Right Left",
2903            "  Header    Aligned         Aligned Aligned",
2904            "----------- ------- --------------- -------------------------",
2905            "   First    row                12.0 Example of a row that",
2906            "                                    spans multiple lines.",
2907            "",
2908            "  Second    row                 5.0 Here's another one.",
2909            "-------------------------------------------------------------",
2910            "",
2911        ];
2912
2913        let mut builder = GreenNodeBuilder::new();
2914        let prefix = ContainerPrefix::default();
2915        let window = StrippedLines::new(&input, 0, &prefix);
2916        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2917
2918        assert!(result.is_some());
2919        assert_eq!(result.unwrap(), 9);
2920    }
2921
2922    #[test]
2923    fn test_multiline_table_headerless() {
2924        let input = vec![
2925            "----------- ------- --------------- -------------------------",
2926            "   First    row                12.0 Example of a row that",
2927            "                                    spans multiple lines.",
2928            "",
2929            "  Second    row                 5.0 Here's another one.",
2930            "----------- ------- --------------- -------------------------",
2931            "",
2932        ];
2933
2934        let mut builder = GreenNodeBuilder::new();
2935        let prefix = ContainerPrefix::default();
2936        let window = StrippedLines::new(&input, 0, &prefix);
2937        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2938
2939        assert!(result.is_some());
2940        assert_eq!(result.unwrap(), 6);
2941    }
2942
2943    #[test]
2944    fn test_multiline_table_headerless_single_line_is_not_multiline() {
2945        let input = vec![
2946            "-------     ------ ----------   -------",
2947            "     12     12        12             12",
2948            "-------     ------ ----------   -------",
2949            "",
2950            "Not part of table.",
2951            "",
2952        ];
2953
2954        let mut builder = GreenNodeBuilder::new();
2955        let prefix = ContainerPrefix::default();
2956        let window = StrippedLines::new(&input, 0, &prefix);
2957        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2958
2959        assert!(result.is_none());
2960    }
2961
2962    #[test]
2963    fn test_multiline_table_headerless_single_row_continuation_without_blank_line() {
2964        let input = vec![
2965            "----------  ---------  -----------  ---------------------------",
2966            "   First    row               12.0  Example of a row that spans",
2967            "                                    multiple lines.",
2968            "----------  ---------  -----------  ---------------------------",
2969            "",
2970        ];
2971
2972        let mut builder = GreenNodeBuilder::new();
2973        let prefix = ContainerPrefix::default();
2974        let window = StrippedLines::new(&input, 0, &prefix);
2975        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2976
2977        assert!(result.is_some());
2978        assert_eq!(result.unwrap(), 4);
2979    }
2980
2981    #[test]
2982    fn test_multiline_table_with_caption() {
2983        let input = vec![
2984            "-------------------------------------------------------------",
2985            " Col1       Col2",
2986            "----------- -------",
2987            "   A        B",
2988            "",
2989            "-------------------------------------------------------------",
2990            "",
2991            "Table: Here's the caption.",
2992            "",
2993        ];
2994
2995        let mut builder = GreenNodeBuilder::new();
2996        let prefix = ContainerPrefix::default();
2997        let window = StrippedLines::new(&input, 0, &prefix);
2998        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2999
3000        assert!(result.is_some());
3001        // table (6 lines) + blank + caption
3002        assert_eq!(result.unwrap(), 8);
3003    }
3004
3005    #[test]
3006    fn test_multiline_table_single_row() {
3007        let input = vec![
3008            "---------------------------------------------",
3009            " Header1    Header2",
3010            "----------- -----------",
3011            "   Data     More data",
3012            "",
3013            "---------------------------------------------",
3014            "",
3015        ];
3016
3017        let mut builder = GreenNodeBuilder::new();
3018        let prefix = ContainerPrefix::default();
3019        let window = StrippedLines::new(&input, 0, &prefix);
3020        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3021
3022        assert!(result.is_some());
3023        assert_eq!(result.unwrap(), 6);
3024    }
3025
3026    #[test]
3027    fn test_headerless_multiline_table_does_not_close_on_full_width_rule() {
3028        let input = vec![
3029            "- - - - -",
3030            "Third section with underscores.",
3031            "",
3032            "_____",
3033            "",
3034            "> Quote before rule",
3035            ">",
3036            "> ***",
3037            ">",
3038            "> Quote after rule",
3039            "",
3040            "Final paragraph.",
3041            "",
3042            "Here's a horizontal rule:",
3043            "",
3044            "---",
3045            "Text directly after the horizontal rule.",
3046            "",
3047        ];
3048
3049        let mut builder = GreenNodeBuilder::new();
3050        let prefix = ContainerPrefix::default();
3051        let window = StrippedLines::new(&input, 0, &prefix);
3052        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3053
3054        assert!(result.is_none());
3055    }
3056
3057    #[test]
3058    fn test_not_multiline_table() {
3059        // Simple table should not be parsed as multiline
3060        let input = vec![
3061            "  Right     Left     Center     Default",
3062            "-------     ------ ----------   -------",
3063            "     12     12        12            12",
3064            "",
3065        ];
3066
3067        let mut builder = GreenNodeBuilder::new();
3068        let prefix = ContainerPrefix::default();
3069        let window = StrippedLines::new(&input, 0, &prefix);
3070        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3071
3072        // Should not parse because first line isn't a full-width separator
3073        assert!(result.is_none());
3074    }
3075
3076    // Phase 7.1: Unit tests for emit_table_cell() helper
3077    #[test]
3078    fn test_emit_table_cell_plain_text() {
3079        let mut builder = GreenNodeBuilder::new();
3080        emit_table_cell(&mut builder, "Cell", &ParserOptions::default());
3081        let green = builder.finish();
3082        let node = SyntaxNode::new_root(green);
3083
3084        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3085        assert_eq!(node.text(), "Cell");
3086
3087        // Should have TEXT child
3088        let children: Vec<_> = node.children_with_tokens().collect();
3089        assert_eq!(children.len(), 1);
3090        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
3091    }
3092
3093    #[test]
3094    fn test_emit_table_cell_with_emphasis() {
3095        let mut builder = GreenNodeBuilder::new();
3096        emit_table_cell(&mut builder, "*italic*", &ParserOptions::default());
3097        let green = builder.finish();
3098        let node = SyntaxNode::new_root(green);
3099
3100        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3101        assert_eq!(node.text(), "*italic*");
3102
3103        // Should have EMPHASIS child
3104        let children: Vec<_> = node.children().collect();
3105        assert_eq!(children.len(), 1);
3106        assert_eq!(children[0].kind(), SyntaxKind::EMPHASIS);
3107    }
3108
3109    #[test]
3110    fn test_emit_table_cell_with_code() {
3111        let mut builder = GreenNodeBuilder::new();
3112        emit_table_cell(&mut builder, "`code`", &ParserOptions::default());
3113        let green = builder.finish();
3114        let node = SyntaxNode::new_root(green);
3115
3116        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3117        assert_eq!(node.text(), "`code`");
3118
3119        // Should have CODE_SPAN child
3120        let children: Vec<_> = node.children().collect();
3121        assert_eq!(children.len(), 1);
3122        assert_eq!(children[0].kind(), SyntaxKind::INLINE_CODE);
3123    }
3124
3125    #[test]
3126    fn test_emit_table_cell_with_link() {
3127        let mut builder = GreenNodeBuilder::new();
3128        emit_table_cell(&mut builder, "[text](url)", &ParserOptions::default());
3129        let green = builder.finish();
3130        let node = SyntaxNode::new_root(green);
3131
3132        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3133        assert_eq!(node.text(), "[text](url)");
3134
3135        // Should have LINK child
3136        let children: Vec<_> = node.children().collect();
3137        assert_eq!(children.len(), 1);
3138        assert_eq!(children[0].kind(), SyntaxKind::LINK);
3139    }
3140
3141    #[test]
3142    fn test_emit_table_cell_with_strong() {
3143        let mut builder = GreenNodeBuilder::new();
3144        emit_table_cell(&mut builder, "**bold**", &ParserOptions::default());
3145        let green = builder.finish();
3146        let node = SyntaxNode::new_root(green);
3147
3148        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3149        assert_eq!(node.text(), "**bold**");
3150
3151        // Should have STRONG child
3152        let children: Vec<_> = node.children().collect();
3153        assert_eq!(children.len(), 1);
3154        assert_eq!(children[0].kind(), SyntaxKind::STRONG);
3155    }
3156
3157    #[test]
3158    fn test_emit_table_cell_mixed_inline() {
3159        let mut builder = GreenNodeBuilder::new();
3160        emit_table_cell(
3161            &mut builder,
3162            "Text **bold** and `code`",
3163            &ParserOptions::default(),
3164        );
3165        let green = builder.finish();
3166        let node = SyntaxNode::new_root(green);
3167
3168        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3169        assert_eq!(node.text(), "Text **bold** and `code`");
3170
3171        // Should have multiple children: TEXT, STRONG, TEXT, CODE_SPAN
3172        let children: Vec<_> = node.children_with_tokens().collect();
3173        assert!(children.len() >= 4);
3174
3175        // Check some expected types
3176        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
3177        assert_eq!(children[1].kind(), SyntaxKind::STRONG);
3178    }
3179
3180    #[test]
3181    fn test_emit_table_cell_empty() {
3182        let mut builder = GreenNodeBuilder::new();
3183        emit_table_cell(&mut builder, "", &ParserOptions::default());
3184        let green = builder.finish();
3185        let node = SyntaxNode::new_root(green);
3186
3187        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3188        assert_eq!(node.text(), "");
3189
3190        // Empty cell should have no children
3191        let children: Vec<_> = node.children_with_tokens().collect();
3192        assert_eq!(children.len(), 0);
3193    }
3194
3195    #[test]
3196    fn test_emit_table_cell_escaped_pipe() {
3197        let mut builder = GreenNodeBuilder::new();
3198        emit_table_cell(&mut builder, r"A \| B", &ParserOptions::default());
3199        let green = builder.finish();
3200        let node = SyntaxNode::new_root(green);
3201
3202        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3203        // The escaped pipe should be preserved
3204        assert_eq!(node.text(), r"A \| B");
3205    }
3206}