Skip to main content

panache_parser/parser/blocks/
tables.rs

1//! Simple table parsing for Pandoc's simple_tables extension.
2
3use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6use unicode_width::UnicodeWidthChar;
7
8use crate::parser::utils::attributes::{
9    emit_attribute_node, try_parse_trailing_attributes_with_pos,
10};
11use crate::parser::utils::helpers::{emit_line_tokens, strip_newline};
12use crate::parser::utils::inline_emission;
13
14use super::container_prefix::StrippedLines;
15
16/// Read-only indexed view over lines for table detection scans. Two
17/// backings:
18///
19/// - `[&str]` — a raw, unstripped line buffer, used by callers that scan
20///   the source directly (the block dispatcher's caption lookahead, list
21///   and definition-list probes).
22/// - [`StrippedLines`] / [`UniformStripView`] — a container-prefix-stripped
23///   view that strips each line lazily on access via
24///   [`StrippedLines::strip_at`]. Detection scans touch only a bounded
25///   range (they stop at the first blank line), so this stays
26///   O(scanned lines) rather than materializing the whole buffer. The old
27///   `strip_all` collected `0..raw.len()` on every call, which was
28///   quadratic when table detection runs at every block start inside a
29///   large blockquote or list.
30pub(crate) trait LineView {
31    /// The line at absolute index `i`.
32    fn line(&self, i: usize) -> &str;
33    /// Total number of lines (absolute upper bound for indices).
34    fn line_count(&self) -> usize;
35}
36
37impl LineView for [&str] {
38    fn line(&self, i: usize) -> &str {
39        self[i]
40    }
41    fn line_count(&self) -> usize {
42        self.len()
43    }
44}
45
46impl<'a, 'p> LineView for StrippedLines<'a, 'p> {
47    fn line(&self, i: usize) -> &str {
48        self.strip_at(i)
49    }
50    fn line_count(&self) -> usize {
51        self.raw().len()
52    }
53}
54
55/// A [`LineView`] over a [`StrippedLines`] window that strips *every* line —
56/// including the dispatch line — with the full container strip rather than
57/// the emission-safe line-0 strip. Grid-border detection needs this: a
58/// `+---+` border sitting at column 0 of a list item's inner content must
59/// not retain the list indent, or the strict column-0 check in
60/// `try_parse_grid_separator` would reject it. Emission still goes through
61/// the window, which preserves the indent bytes. This reproduces the old
62/// grid path's `stripped[dispatch] = prefix.strip(...)` override, but
63/// lazily.
64pub(crate) struct UniformStripView<'s, 'a, 'p>(&'s StrippedLines<'a, 'p>);
65
66impl<'s, 'a, 'p> LineView for UniformStripView<'s, 'a, 'p> {
67    fn line(&self, i: usize) -> &str {
68        self.0.prefix().strip(self.0.raw()[i])
69    }
70    fn line_count(&self) -> usize {
71        self.0.raw().len()
72    }
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76pub enum Alignment {
77    Left,
78    Right,
79    Center,
80    Default,
81}
82
83/// Column information extracted from the separator line.
84#[derive(Debug, Clone)]
85pub(crate) struct Column {
86    /// Start position (byte index) in the line
87    start: usize,
88    /// End position (byte index) in the line
89    end: usize,
90    /// Column alignment
91    alignment: Alignment,
92}
93
94/// Try to detect if a line is a table separator line.
95/// Returns Some(column positions) if it's a valid separator.
96pub(crate) fn try_parse_table_separator(line: &str) -> Option<Vec<Column>> {
97    let trimmed = line.trim_start();
98    // Strip trailing newline if present (CRLF or LF)
99    let (trimmed, newline_str) = strip_newline(trimmed);
100    let leading_spaces = line.len() - trimmed.len() - newline_str.len();
101
102    // Must have leading spaces <= 3 to not be a code block
103    if leading_spaces > 3 {
104        return None;
105    }
106
107    // Simple tables only use dashed separators.
108    if trimmed.contains('*') || trimmed.contains('_') {
109        return None;
110    }
111
112    // Must contain at least one dash
113    if !trimmed.contains('-') {
114        return None;
115    }
116
117    // A separator line consists of dashes and spaces
118    if !trimmed.chars().all(|c| c == '-' || c == ' ') {
119        return None;
120    }
121
122    // Must not be a horizontal rule.
123    let dash_groups: Vec<_> = trimmed.split(' ').filter(|s| !s.is_empty()).collect();
124    if dash_groups.len() <= 1 {
125        return None;
126    }
127
128    // Extract column positions from dash groups
129    let columns = extract_columns(trimmed, leading_spaces);
130
131    if columns.is_empty() {
132        return None;
133    }
134
135    Some(columns)
136}
137
138/// Extract column positions from a separator line.
139fn extract_columns(separator: &str, offset: usize) -> Vec<Column> {
140    let mut columns = Vec::new();
141    let mut in_dashes = false;
142    let mut col_start = 0;
143
144    for (i, ch) in separator.char_indices() {
145        match ch {
146            '-' if !in_dashes => {
147                col_start = i + offset;
148                in_dashes = true;
149            }
150            ' ' if in_dashes => {
151                columns.push(Column {
152                    start: col_start,
153                    end: i + offset,
154                    alignment: Alignment::Default, // Will be determined later
155                });
156                in_dashes = false;
157            }
158            _ => {}
159        }
160    }
161
162    // Handle last column
163    if in_dashes {
164        columns.push(Column {
165            start: col_start,
166            end: separator.len() + offset,
167            alignment: Alignment::Default,
168        });
169    }
170
171    columns
172}
173
174/// Convert a character column offset into a UTF-8 byte index for `line`.
175///
176/// Simple-table column boundaries come from ASCII separator lines where
177/// character and byte offsets are identical. Data rows may contain multibyte
178/// characters, so we must remap offsets before slicing.
179fn column_offset_to_byte_index(line: &str, offset: usize) -> usize {
180    line.char_indices()
181        .nth(offset)
182        .map_or(line.len(), |(byte_idx, _)| byte_idx)
183}
184
185/// Try to parse a table caption from a line.
186/// Returns Some((prefix_len, caption_text)) if it's a caption.
187fn try_parse_caption_prefix(line: &str) -> Option<(usize, &str)> {
188    let trimmed = line.trim_start();
189    let leading_spaces = line.len() - trimmed.len();
190
191    // Must have leading spaces <= 3 to not be a code block
192    if leading_spaces > 3 {
193        return None;
194    }
195
196    // Check for "Table:" or "table:" or just ":".
197    if let Some(rest) = trimmed.strip_prefix("Table:") {
198        Some((leading_spaces + 6, rest))
199    } else if let Some(rest) = trimmed.strip_prefix("table:") {
200        Some((leading_spaces + 6, rest))
201    } else if let Some(rest) = trimmed.strip_prefix(':') {
202        // Just ":" caption markers must be followed by whitespace (Pandoc-style).
203        // This avoids accidentally treating constructs like fenced div fences ":::" as captions.
204        if rest.starts_with(|c: char| c.is_whitespace()) {
205            Some((leading_spaces + 1, rest))
206        } else {
207            None
208        }
209    } else {
210        None
211    }
212}
213
214/// Check if a line could be the start of a table caption.
215fn is_table_caption_start(line: &str) -> bool {
216    try_parse_caption_prefix(line).is_some()
217}
218
219fn is_bare_colon_caption_start(line: &str) -> bool {
220    let trimmed = line.trim_start();
221    trimmed.starts_with(':') && !trimmed.starts_with("::") && !trimmed.starts_with(":::")
222}
223
224fn bare_colon_caption_looks_like_definition_code_block(line: &str) -> bool {
225    let Some((_, rest)) = try_parse_caption_prefix(line) else {
226        return false;
227    };
228    let trimmed = rest.trim_start();
229    trimmed.starts_with("```") || trimmed.starts_with("~~~")
230}
231
232fn line_is_fenced_div_fence(line: &str) -> bool {
233    let trimmed = line.trim_start();
234    let colon_count = trimmed.chars().take_while(|&c| c == ':').count();
235    if colon_count < 3 {
236        return false;
237    }
238    let rest = &trimmed[colon_count..];
239    rest.is_empty() || rest.starts_with(char::is_whitespace)
240}
241
242fn is_valid_caption_start_before_table(lines: &(impl LineView + ?Sized), pos: usize) -> bool {
243    if !is_table_caption_start(lines.line(pos)) {
244        return false;
245    }
246
247    if is_bare_colon_caption_start(lines.line(pos))
248        && bare_colon_caption_looks_like_definition_code_block(lines.line(pos))
249    {
250        return false;
251    }
252
253    // Avoid stealing definition-list definitions (":   ...") as table captions.
254    if is_bare_colon_caption_start(lines.line(pos))
255        && pos > 0
256        && !lines.line(pos - 1).trim().is_empty()
257        && !line_is_fenced_div_fence(lines.line(pos - 1))
258    {
259        return false;
260    }
261    true
262}
263
264/// Check if a line could be the start of a grid table.
265/// Grid tables start with a separator line like +---+---+ or +===+===+
266fn is_grid_table_start(line: &str) -> bool {
267    try_parse_grid_separator(line).is_some()
268}
269
270/// Check if a line could be the start of a multiline table.
271/// Multiline tables start with either:
272/// - A full-width dash separator (----)
273/// - A column separator with dashes and spaces (---- ---- ----)
274fn is_multiline_table_start(line: &str) -> bool {
275    try_parse_multiline_separator(line).is_some() || is_column_separator(line)
276}
277
278/// Check if there's a table following a potential caption at this position.
279/// This is used to avoid parsing a caption as a paragraph when it belongs to a table.
280pub(crate) fn is_caption_followed_by_table(
281    lines: &(impl LineView + ?Sized),
282    caption_pos: usize,
283) -> bool {
284    if caption_pos >= lines.line_count() {
285        return false;
286    }
287
288    // Caption must start with a caption prefix
289    if !is_valid_caption_start_before_table(lines, caption_pos) {
290        return false;
291    }
292
293    let mut pos = caption_pos + 1;
294
295    // Skip continuation lines of caption (non-blank lines).
296    // Stop at fenced-div fences (`:::`) — those close the enclosing div and
297    // must not be folded into the caption.
298    while pos < lines.line_count()
299        && !lines.line(pos).trim().is_empty()
300        && !line_is_fenced_div_fence(lines.line(pos))
301    {
302        // If we hit a table separator, we found a table
303        if try_parse_table_separator(lines.line(pos)).is_some() {
304            return true;
305        }
306        pos += 1;
307    }
308
309    // Skip one blank line
310    if pos < lines.line_count() && lines.line(pos).trim().is_empty() {
311        pos += 1;
312    }
313
314    // Check for a table grid at the next position.
315    table_grid_starts_at(lines, pos)
316}
317
318/// Cheap lookahead: does any table kind's grid begin at absolute line `pos`?
319///
320/// This is the lightweight twin of the block dispatcher's `first_kind_at`,
321/// which answers the same "is there a table here?" question by attempting a
322/// full parse of each kind in turn. We deliberately do **not** call that from
323/// the caption lookahead: caption detection runs at every block start, and a
324/// full per-kind parse there would reintroduce the O(n²) blowup the bounded
325/// separator probe exists to avoid. To keep the two predicates in agreement,
326/// this calls the same primitive separator detectors the real parsers gate on
327/// (`is_grid_table_start` → `try_parse_grid_separator`, `is_multiline_table_start`
328/// → `try_parse_multiline_separator`/`is_column_separator`,
329/// `try_parse_table_separator`, `try_parse_pipe_separator`).
330fn table_grid_starts_at(lines: &(impl LineView + ?Sized), pos: usize) -> bool {
331    if pos >= lines.line_count() {
332        return false;
333    }
334    let line = lines.line(pos);
335
336    // Grid table start (`+---+---+` or `+===+===+`).
337    if is_grid_table_start(line) {
338        return true;
339    }
340
341    // Multiline table start (`----` or `---- ---- ----`).
342    if is_multiline_table_start(line) {
343        return true;
344    }
345
346    // Separator line (simple/pipe table, headerless).
347    if try_parse_table_separator(line).is_some() {
348        return true;
349    }
350
351    // Header line followed by a separator (simple/pipe table with header).
352    if pos + 1 < lines.line_count() && !line.trim().is_empty() {
353        let next_line = lines.line(pos + 1);
354        if try_parse_table_separator(next_line).is_some()
355            || try_parse_pipe_separator(next_line).is_some()
356        {
357            return true;
358        }
359    }
360
361    false
362}
363
364fn caption_range_starting_at(
365    lines: &(impl LineView + ?Sized),
366    start: usize,
367) -> Option<(usize, usize)> {
368    if start >= lines.line_count() || !is_table_caption_start(lines.line(start)) {
369        return None;
370    }
371    let mut end = start + 1;
372    while end < lines.line_count()
373        && !lines.line(end).trim().is_empty()
374        && !line_is_fenced_div_fence(lines.line(end))
375    {
376        end += 1;
377    }
378    Some((start, end))
379}
380
381/// Find caption before table (if any).
382/// Returns (caption_start, caption_end) positions, or None.
383fn find_caption_before_table(
384    lines: &(impl LineView + ?Sized),
385    table_start: usize,
386) -> Option<(usize, usize)> {
387    if table_start == 0 {
388        return None;
389    }
390
391    // Look backward for a caption
392    // Caption must be immediately before table (with possible blank line between)
393    let mut pos = table_start - 1;
394
395    // Skip one blank line if present
396    if lines.line(pos).trim().is_empty() {
397        if pos == 0 {
398            return None;
399        }
400        pos -= 1;
401    }
402
403    // Now pos points to the last non-blank line before the table
404    // This could be the last line of a multiline caption, or a single-line caption
405    let caption_end = pos + 1; // End is exclusive
406
407    // If this line is NOT a caption start, it might be a continuation line
408    // Scan backward through non-blank lines to find the caption start
409    if !is_valid_caption_start_before_table(lines, pos) {
410        // Not a caption start - check if there's a caption start above
411        let mut scan_pos = pos;
412        while scan_pos > 0 {
413            scan_pos -= 1;
414            let line = lines.line(scan_pos);
415
416            // If we hit a blank line or fenced-div fence, we've gone too far
417            if line.trim().is_empty() || line_is_fenced_div_fence(line) {
418                return None;
419            }
420
421            // If we find a caption start, this is the beginning of the multiline caption
422            if is_valid_caption_start_before_table(lines, scan_pos) {
423                if scan_pos > 0 && !lines.line(scan_pos - 1).trim().is_empty() {
424                    return None;
425                }
426                if previous_nonblank_looks_like_table(lines, scan_pos) {
427                    return None;
428                }
429                return Some((scan_pos, caption_end));
430            }
431        }
432        // Scanned to beginning without finding caption start
433        None
434    } else {
435        if pos > 0 && !lines.line(pos - 1).trim().is_empty() {
436            return None;
437        }
438        if previous_nonblank_looks_like_table(lines, pos) {
439            return None;
440        }
441        // This line is a caption start - return the range
442        Some((pos, caption_end))
443    }
444}
445
446fn previous_nonblank_looks_like_table(lines: &(impl LineView + ?Sized), pos: usize) -> bool {
447    if pos == 0 {
448        return false;
449    }
450    let mut i = pos;
451    while i > 0 {
452        i -= 1;
453        let line = lines.line(i).trim();
454        if line.is_empty() {
455            continue;
456        }
457        return line_looks_like_table_syntax(line);
458    }
459    false
460}
461
462fn line_looks_like_table_syntax(line: &str) -> bool {
463    if line.starts_with('|') && line.matches('|').count() >= 2 {
464        return true;
465    }
466    if line.starts_with('+') && line.ends_with('+') && (line.contains('-') || line.contains('=')) {
467        return true;
468    }
469    try_parse_table_separator(line).is_some()
470        || try_parse_pipe_separator(line).is_some()
471        || try_parse_grid_separator(line).is_some()
472}
473
474/// Find caption after table (if any).
475/// Returns (caption_start, caption_end) positions, or None.
476fn find_caption_after_table(
477    lines: &(impl LineView + ?Sized),
478    table_end: usize,
479) -> Option<(usize, usize)> {
480    if table_end >= lines.line_count() {
481        return None;
482    }
483
484    let mut pos = table_end;
485
486    // Skip one blank line if present
487    if pos < lines.line_count() && lines.line(pos).trim().is_empty() {
488        pos += 1;
489    }
490
491    if pos >= lines.line_count() {
492        return None;
493    }
494
495    // Check if this line is a caption
496    if is_table_caption_start(lines.line(pos)) {
497        let caption_start = pos;
498        // Find end of caption (continues until blank line or fenced-div fence)
499        let mut caption_end = caption_start + 1;
500        while caption_end < lines.line_count()
501            && !lines.line(caption_end).trim().is_empty()
502            && !line_is_fenced_div_fence(lines.line(caption_end))
503        {
504            caption_end += 1;
505        }
506        Some((caption_start, caption_end))
507    } else {
508        None
509    }
510}
511
512/// Emit a table caption node.
513/// Emit caption text for a single line. If `lift_trailing_attrs` is set and
514/// the text ends with a balanced `{...}` block, lift it into a structural
515/// `ATTRIBUTE` node so `AttributeNode::cast` finds its id (matches Pandoc's
516/// `+caption_attributes` behavior — `: caption {#tbl-id}` gives the table
517/// the id).
518fn emit_caption_line_text(
519    builder: &mut GreenNodeBuilder<'static>,
520    text_with_newline: &str,
521    config: &ParserOptions,
522    lift_trailing_attrs: bool,
523) {
524    let (text, newline_str) = strip_newline(text_with_newline);
525
526    if lift_trailing_attrs
527        && !text.is_empty()
528        && let Some((_attrs, before_attrs, start_brace_pos)) =
529            try_parse_trailing_attributes_with_pos(text)
530    {
531        let trimmed_len = text.trim_end().len();
532        let space = &text[before_attrs.len()..start_brace_pos];
533        let raw_attrs = &text[start_brace_pos..trimmed_len];
534        let trailing_ws = &text[trimmed_len..];
535
536        if !before_attrs.is_empty() {
537            inline_emission::emit_inlines(builder, before_attrs, config, false);
538        }
539        if !space.is_empty() {
540            builder.token(SyntaxKind::WHITESPACE.into(), space);
541        }
542        emit_attribute_node(builder, raw_attrs);
543        if !trailing_ws.is_empty() {
544            builder.token(SyntaxKind::WHITESPACE.into(), trailing_ws);
545        }
546        if !newline_str.is_empty() {
547            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
548        }
549        return;
550    }
551
552    if !text.is_empty() {
553        inline_emission::emit_inlines(builder, text, config, false);
554    }
555    if !newline_str.is_empty() {
556        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
557    }
558}
559
560/// Emit the blank (container-only) lines in the absolute range `[from, to)` as
561/// `BLANK_LINE` nodes. Re-emits each line's container prefix as tokens via the
562/// window, so a `>`-only blank line between a caption and its table inside a
563/// blockquote round-trips losslessly. Mirrors the interior blank-row emitter in
564/// `try_parse_multiline_table`. An empty range emits nothing.
565fn emit_caption_blank_lines(
566    builder: &mut GreenNodeBuilder<'static>,
567    window: &StrippedLines<'_, '_>,
568    from: usize,
569    to: usize,
570) {
571    for abs in from..to {
572        // `window.line` is the container-stripped view, so a `>`-only line reads
573        // as blank.
574        if window.line(abs).trim().is_empty() {
575            builder.start_node(SyntaxKind::BLANK_LINE.into());
576            let tail = window.emit_or_dispatch_tail(builder, abs);
577            builder.token(SyntaxKind::BLANK_LINE.into(), tail);
578            builder.finish_node();
579        }
580    }
581}
582
583fn emit_table_caption(
584    builder: &mut GreenNodeBuilder<'static>,
585    window: &StrippedLines<'_, '_>,
586    start: usize,
587    end: usize,
588    config: &ParserOptions,
589) {
590    builder.start_node(SyntaxKind::TABLE_CAPTION.into());
591
592    let last_idx = (end - start).saturating_sub(1);
593
594    for (i, abs) in (start..end).enumerate() {
595        let lift_attrs = i == last_idx;
596
597        // Re-emit this caption line's container prefix (`>`/whitespace) as
598        // tokens — except the dispatch line, whose prefix the core already
599        // emitted — and operate on the stripped `tail`, so the caption prefix
600        // (`Table:`/`:`) is recognized inside a blockquote or list rather than
601        // swallowed into the caption text (which doubled the marker and broke
602        // losslessness).
603        let tail = window.emit_or_dispatch_tail(builder, abs);
604
605        if i == 0 {
606            // First line - parse and emit prefix separately
607            let trimmed = tail.trim_start();
608            let leading_ws_len = tail.len() - trimmed.len();
609
610            // Emit leading whitespace if present
611            if leading_ws_len > 0 {
612                builder.token(SyntaxKind::WHITESPACE.into(), &tail[..leading_ws_len]);
613            }
614
615            // Check for caption prefix and emit separately
616            // Calculate where the prefix ends (after trimmed content)
617            let prefix_and_rest = if tail.ends_with('\n') {
618                &tail[leading_ws_len..tail.len() - 1] // Exclude newline
619            } else {
620                &tail[leading_ws_len..]
621            };
622
623            let (prefix_len, prefix_text) = if prefix_and_rest.starts_with("Table: ") {
624                (7, "Table: ")
625            } else if prefix_and_rest.starts_with("table: ") {
626                (7, "table: ")
627            } else if prefix_and_rest.starts_with(": ") {
628                (2, ": ")
629            } else if prefix_and_rest.starts_with(':') {
630                (1, ":")
631            } else {
632                (0, "")
633            };
634
635            if prefix_len > 0 {
636                builder.token(SyntaxKind::TABLE_CAPTION_PREFIX.into(), prefix_text);
637
638                // Emit rest of line after prefix
639                let rest_start = leading_ws_len + prefix_len;
640                if rest_start < tail.len() {
641                    emit_caption_line_text(builder, &tail[rest_start..], config, lift_attrs);
642                }
643            } else {
644                // No recognized prefix, emit whole trimmed line
645                emit_caption_line_text(builder, &tail[leading_ws_len..], config, lift_attrs);
646            }
647        } else {
648            // Continuation lines - emit with inline parsing (attrs only on last line).
649            emit_caption_line_text(builder, tail, config, lift_attrs);
650        }
651    }
652
653    builder.finish_node(); // TABLE_CAPTION
654}
655
656/// Emit a table cell with inline content parsing.
657/// This is the core helper for Phase 7.1 table inline parsing migration.
658fn emit_table_cell(
659    builder: &mut GreenNodeBuilder<'static>,
660    cell_text: &str,
661    config: &ParserOptions,
662) {
663    builder.start_node(SyntaxKind::TABLE_CELL.into());
664
665    // Parse inline content within the cell
666    if !cell_text.is_empty() {
667        inline_emission::emit_inlines(builder, cell_text, config, false);
668    }
669
670    builder.finish_node(); // TABLE_CELL
671}
672
673/// Determine column alignments based on separator and optional header.
674fn determine_alignments(columns: &mut [Column], separator_line: &str, header_line: Option<&str>) {
675    for col in columns.iter_mut() {
676        let sep_slice = &separator_line[col.start..col.end];
677
678        if let Some(header) = header_line {
679            let header_start = column_offset_to_byte_index(header, col.start);
680            let header_end = column_offset_to_byte_index(header, col.end);
681
682            // Extract header text for this column
683            let header_text = if header_start < header_end {
684                header[header_start..header_end].trim()
685            } else if header_start < header.len() {
686                header[header_start..].trim()
687            } else {
688                ""
689            };
690
691            if header_text.is_empty() {
692                col.alignment = Alignment::Default;
693                continue;
694            }
695
696            // Find where the header text starts and ends within the column
697            let header_in_col = &header[header_start..header_end];
698            let text_start = header_in_col.len() - header_in_col.trim_start().len();
699            let text_end = header_in_col.trim_end().len() + text_start;
700
701            // Check dash alignment relative to text
702            let dashes_start = 0; // Dashes start at beginning of sep_slice
703            let dashes_end = sep_slice.len();
704
705            let flush_left = dashes_start == text_start;
706            let flush_right = dashes_end == text_end;
707
708            col.alignment = match (flush_left, flush_right) {
709                (true, true) => Alignment::Default,
710                (true, false) => Alignment::Left,
711                (false, true) => Alignment::Right,
712                (false, false) => Alignment::Center,
713            };
714        } else {
715            // Without header, alignment based on first row (we'll handle this later)
716            col.alignment = Alignment::Default;
717        }
718    }
719}
720
721/// Try to parse a simple table starting at the given position.
722/// Returns the number of lines consumed if successful.
723pub(crate) fn try_parse_simple_table(
724    window: &StrippedLines<'_, '_>,
725    builder: &mut GreenNodeBuilder<'static>,
726    config: &ParserOptions,
727) -> Option<usize> {
728    let lines = window.raw();
729    let start_pos = window.pos();
730    log::trace!("try_parse_simple_table at line {}", start_pos + 1);
731
732    if start_pos >= lines.len() {
733        return None;
734    }
735
736    // Cheap gate before the O(buffer) `strip_all` below: a simple table's
737    // separator must sit on the dispatch line or the line just after it (see
738    // `find_separator_line`). Table detection runs at every block start, so
739    // stripping the whole line buffer for every prose/math paragraph that
740    // can't be a table was quadratic on large documents. Peek just those one
741    // or two lines via `strip_at` and bail before materializing the full view.
742    let gate_first = window.strip_at(start_pos);
743    let separator_here = try_parse_table_separator(gate_first).is_some();
744    let separator_next = !separator_here
745        && start_pos + 1 < lines.len()
746        && !gate_first.trim().is_empty()
747        && try_parse_table_separator(window.strip_at(start_pos + 1)).is_some();
748    if !separator_here && !separator_next {
749        return None;
750    }
751
752    // Detection scans read the container-prefix-stripped view lazily through
753    // the window (see `LineView`): a table nested in `list → blockquote`
754    // (e.g. `- >  a   b`) has its `  > ` prefix removed before the
755    // separator/column-shape checks. With an empty prefix the stripped view
756    // equals the raw lines. Scans stop at the first blank line, so only a
757    // bounded range is ever stripped. Emission re-emits the prefix bytes as
758    // tokens via the window; captions/blank lines still read raw `lines`.
759
760    // Look for a separator line
761    let separator_pos = find_separator_line(window, start_pos)?;
762    log::trace!("  found separator at line {}", separator_pos + 1);
763
764    let separator_line = window.line(separator_pos);
765    let mut columns = try_parse_table_separator(separator_line)?;
766
767    // Determine if there's a header (separator not at start)
768    let has_header = separator_pos > start_pos;
769    let header_line = if has_header {
770        Some(window.line(separator_pos - 1))
771    } else {
772        None
773    };
774
775    // Determine alignments
776    determine_alignments(&mut columns, separator_line, header_line);
777
778    // Find table end (blank line or end of input)
779    let end_pos = find_table_end(window, separator_pos + 1);
780
781    // Must have at least one data row (or it's just a separator)
782    let data_rows = end_pos - separator_pos - 1;
783
784    if data_rows == 0 {
785        return None;
786    }
787
788    // Check for caption before table
789    let caption_before = find_caption_before_table(window, start_pos);
790
791    // Check for caption after table
792    let caption_after = if caption_before.is_some() {
793        None
794    } else {
795        find_caption_after_table(window, end_pos)
796    };
797
798    // Build the table
799    builder.start_node(SyntaxKind::SIMPLE_TABLE.into());
800
801    // Emit caption before if present
802    if let Some((cap_start, cap_end)) = caption_before {
803        emit_table_caption(builder, window, cap_start, cap_end, config);
804        // Emit blank line between caption and table if present
805        emit_caption_blank_lines(builder, window, cap_end, start_pos);
806    }
807
808    // Emit header if present. On the dispatch line the core already emitted
809    // the container prefix; only continuation rows re-emit it (via the window
810    // inside `emit_table_row`).
811    if has_header {
812        emit_table_row(
813            builder,
814            window,
815            separator_pos - 1,
816            &columns,
817            SyntaxKind::TABLE_HEADER,
818            config,
819        );
820    }
821
822    // Emit separator, re-emitting any continuation-line container prefix
823    // (`  > `) as WHITESPACE/BLOCK_QUOTE_MARKER tokens before the row text.
824    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
825    let separator_tail = window.emit_or_dispatch_tail(builder, separator_pos);
826    emit_line_tokens(builder, separator_tail);
827    builder.finish_node();
828
829    // Emit data rows (always continuation lines)
830    for idx in (separator_pos + 1)..end_pos {
831        emit_table_row(
832            builder,
833            window,
834            idx,
835            &columns,
836            SyntaxKind::TABLE_ROW,
837            config,
838        );
839    }
840
841    // Emit caption after if present
842    if let Some((cap_start, cap_end)) = caption_after {
843        // Emit blank line before caption if needed
844        emit_caption_blank_lines(builder, window, end_pos, cap_start);
845        emit_table_caption(builder, window, cap_start, cap_end, config);
846    }
847
848    builder.finish_node(); // SimpleTable
849
850    // Calculate lines consumed (including captions)
851    let table_start = if let Some((cap_start, _)) = caption_before {
852        cap_start
853    } else if has_header {
854        separator_pos - 1
855    } else {
856        separator_pos
857    };
858
859    let table_end = if let Some((_, cap_end)) = caption_after {
860        cap_end
861    } else {
862        end_pos
863    };
864
865    let lines_consumed = table_end - table_start;
866
867    Some(lines_consumed)
868}
869
870/// Find the position of a separator line starting from pos.
871fn find_separator_line(lines: &(impl LineView + ?Sized), start_pos: usize) -> Option<usize> {
872    log::trace!("  find_separator_line from line {}", start_pos + 1);
873
874    // Check first line
875    log::trace!("    checking first line: {:?}", lines.line(start_pos));
876    if try_parse_table_separator(lines.line(start_pos)).is_some() {
877        log::trace!("    separator found at first line");
878        return Some(start_pos);
879    }
880
881    // Check second line (for table with header)
882    if start_pos + 1 < lines.line_count()
883        && !lines.line(start_pos).trim().is_empty()
884        && try_parse_table_separator(lines.line(start_pos + 1)).is_some()
885    {
886        return Some(start_pos + 1);
887    }
888    None
889}
890
891/// Find where the table ends (first blank line or end of input).
892fn find_table_end(lines: &(impl LineView + ?Sized), start_pos: usize) -> usize {
893    for i in start_pos..lines.line_count() {
894        if lines.line(i).trim().is_empty() {
895            return i;
896        }
897        // Check if this could be a closing separator
898        if try_parse_table_separator(lines.line(i)).is_some() {
899            // Check if next line is blank or end
900            if i + 1 >= lines.line_count() || lines.line(i + 1).trim().is_empty() {
901                return i + 1;
902            }
903        }
904    }
905    lines.line_count()
906}
907
908/// Emit a table row (header or data row) with inline-parsed cells for simple tables.
909/// Uses column boundaries from the separator line to extract cells.
910fn emit_table_row(
911    builder: &mut GreenNodeBuilder<'static>,
912    window: &StrippedLines<'_, '_>,
913    abs_idx: usize,
914    columns: &[Column],
915    row_kind: SyntaxKind,
916    config: &ParserOptions,
917) {
918    builder.start_node(row_kind.into());
919
920    // On continuation lines the leading `  > ` prefix is re-emitted as
921    // WHITESPACE/BLOCK_QUOTE_MARKER tokens inside the row node and the
922    // stripped tail returned; the dispatch line just strips its (already
923    // core-emitted) prefix. Empty prefix ⇒ the raw line.
924    let line = window.emit_or_dispatch_tail(builder, abs_idx);
925
926    let (line_without_newline, newline_str) = strip_newline(line);
927
928    // Emit leading whitespace if present
929    let trimmed = line_without_newline.trim_start();
930    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
931    if leading_ws_len > 0 {
932        builder.token(
933            SyntaxKind::WHITESPACE.into(),
934            &line_without_newline[..leading_ws_len],
935        );
936    }
937
938    // Track where we are in the line (for losslessness)
939    let mut current_pos = 0;
940
941    // Extract and emit cells based on column boundaries
942    for col in columns.iter() {
943        // Calculate actual positions in the trimmed line (accounting for leading whitespace)
944        let cell_start = if col.start >= leading_ws_len {
945            column_offset_to_byte_index(trimmed, col.start - leading_ws_len)
946        } else {
947            0
948        };
949
950        let cell_end = if col.end >= leading_ws_len {
951            column_offset_to_byte_index(trimmed, col.end - leading_ws_len)
952        } else {
953            0
954        };
955
956        // Extract cell text from column bounds. When the column lies entirely
957        // before the trimmed content (col.end <= leading_ws_len) both bounds
958        // clamp to 0; treat that as an empty cell rather than re-emitting the
959        // whole row.
960        let cell_text = if cell_start < cell_end && cell_start < trimmed.len() {
961            &trimmed[cell_start..cell_end]
962        } else {
963            ""
964        };
965
966        let cell_content = cell_text.trim();
967        let cell_content_start = cell_text.len() - cell_text.trim_start().len();
968
969        // Emit any whitespace from current position to start of cell content
970        let content_abs_pos = (cell_start + cell_content_start).min(trimmed.len());
971        if current_pos < content_abs_pos {
972            builder.token(
973                SyntaxKind::WHITESPACE.into(),
974                &trimmed[current_pos..content_abs_pos],
975            );
976        }
977
978        // Emit cell with inline parsing
979        emit_table_cell(builder, cell_content, config);
980
981        // Update current position to end of cell content
982        current_pos = content_abs_pos + cell_content.len();
983    }
984
985    // Emit any remaining whitespace after last cell
986    if current_pos < trimmed.len() {
987        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
988    }
989
990    // Emit newline if present
991    if !newline_str.is_empty() {
992        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
993    }
994
995    builder.finish_node();
996}
997
998// ============================================================================
999// Pipe Table Parsing
1000// ============================================================================
1001
1002/// Check if a line is a pipe table separator line.
1003/// Returns the column alignments if it's a valid separator.
1004fn try_parse_pipe_separator(line: &str) -> Option<Vec<Alignment>> {
1005    let trimmed = line.trim();
1006
1007    // Must contain at least one pipe
1008    if !trimmed.contains('|') && !trimmed.contains('+') {
1009        return None;
1010    }
1011
1012    // Split by pipes (or + for orgtbl variant)
1013    let cells: Vec<&str> = if trimmed.contains('+') {
1014        // Orgtbl variant: use + as separator in separator line
1015        trimmed.split(['|', '+']).collect()
1016    } else {
1017        trimmed.split('|').collect()
1018    };
1019
1020    let mut alignments = Vec::new();
1021
1022    for cell in cells {
1023        let cell = cell.trim();
1024
1025        // Skip empty cells (from leading/trailing pipes)
1026        if cell.is_empty() {
1027            continue;
1028        }
1029
1030        // Must be dashes with optional colons
1031        let starts_colon = cell.starts_with(':');
1032        let ends_colon = cell.ends_with(':');
1033
1034        // Remove colons to check if rest is all dashes
1035        let without_colons = cell.trim_start_matches(':').trim_end_matches(':');
1036
1037        // Must have at least one dash
1038        if without_colons.is_empty() || !without_colons.chars().all(|c| c == '-') {
1039            return None;
1040        }
1041
1042        // Determine alignment from colon positions
1043        let alignment = match (starts_colon, ends_colon) {
1044            (true, true) => Alignment::Center,
1045            (true, false) => Alignment::Left,
1046            (false, true) => Alignment::Right,
1047            (false, false) => Alignment::Default,
1048        };
1049
1050        alignments.push(alignment);
1051    }
1052
1053    // Must have at least one column
1054    if alignments.is_empty() {
1055        None
1056    } else {
1057        Some(alignments)
1058    }
1059}
1060
1061/// Split a pipe table row into cells.
1062/// Handles escaped pipes (\|) properly by not splitting on them.
1063fn parse_pipe_table_row(line: &str) -> Vec<String> {
1064    let trimmed = line.trim();
1065
1066    let mut cells = Vec::new();
1067    let mut current_cell = String::new();
1068    let mut chars = trimmed.chars().peekable();
1069    let mut char_count = 0;
1070
1071    while let Some(ch) = chars.next() {
1072        char_count += 1;
1073        match ch {
1074            '\\' => {
1075                // Check if next char is a pipe - if so, it's an escaped pipe
1076                if let Some(&'|') = chars.peek() {
1077                    current_cell.push('\\');
1078                    current_cell.push('|');
1079                    chars.next(); // consume the pipe
1080                } else {
1081                    current_cell.push(ch);
1082                }
1083            }
1084            '|' => {
1085                // Check if this is the leading pipe (first character)
1086                if char_count == 1 {
1087                    continue; // Skip leading pipe
1088                }
1089
1090                // End current cell, start new one
1091                cells.push(current_cell.trim().to_string());
1092                current_cell.clear();
1093            }
1094            _ => {
1095                current_cell.push(ch);
1096            }
1097        }
1098    }
1099
1100    // Add last cell if it's not empty (it would be empty if line ended with pipe)
1101    let trimmed_cell = current_cell.trim().to_string();
1102    if !trimmed_cell.is_empty() {
1103        cells.push(trimmed_cell);
1104    }
1105
1106    cells
1107}
1108
1109/// Emit a pipe table row with inline-parsed cells.
1110/// Preserves losslessness by emitting exact byte representation while parsing cell content inline.
1111fn emit_pipe_table_row(
1112    builder: &mut GreenNodeBuilder<'static>,
1113    window: &StrippedLines<'_, '_>,
1114    abs_idx: usize,
1115    row_kind: SyntaxKind,
1116    config: &ParserOptions,
1117) {
1118    builder.start_node(row_kind.into());
1119
1120    // On continuation lines (separator/data rows under a list+blockquote
1121    // container) the leading `  > ` prefix is not consumed by the core;
1122    // `emit_prefix_at` re-emits it as WHITESPACE/BLOCK_QUOTE_MARKER tokens
1123    // and returns the stripped tail. On the dispatch line the core already
1124    // emitted the prefix, so `dispatch_tail` just strips it from our view.
1125    // With an empty prefix (non-nested tables) both are no-ops returning
1126    // the raw line.
1127    let line = if abs_idx == window.dispatch_pos() {
1128        window.dispatch_tail()
1129    } else {
1130        window.emit_prefix_at(builder, abs_idx)
1131    };
1132
1133    let (line_without_newline, newline_str) = strip_newline(line);
1134    let trimmed = line_without_newline.trim();
1135
1136    // Parse cell boundaries
1137    let mut cell_starts = Vec::new();
1138    let mut cell_ends = Vec::new();
1139    let mut in_escape = false;
1140
1141    // Find all pipe positions (excluding escaped ones)
1142    let mut pipe_positions = Vec::new();
1143    for (i, ch) in trimmed.char_indices() {
1144        if in_escape {
1145            in_escape = false;
1146            continue;
1147        }
1148        if ch == '\\' {
1149            in_escape = true;
1150            continue;
1151        }
1152        if ch == '|' {
1153            pipe_positions.push(i);
1154        }
1155    }
1156
1157    // Determine cell boundaries based on pipe positions
1158    if pipe_positions.is_empty() {
1159        // No pipes - treat entire line as one cell (shouldn't happen for valid pipe tables)
1160        cell_starts.push(0);
1161        cell_ends.push(trimmed.len());
1162    } else {
1163        // Check if line starts with pipe
1164        let start_pipe = pipe_positions.first() == Some(&0);
1165        // Check if line ends with pipe
1166        let end_pipe = pipe_positions.last() == Some(&(trimmed.len() - 1));
1167
1168        if start_pipe {
1169            // Skip first pipe
1170            for i in 1..pipe_positions.len() {
1171                cell_starts.push(pipe_positions[i - 1] + 1);
1172                cell_ends.push(pipe_positions[i]);
1173            }
1174            // Add last cell if there's no trailing pipe
1175            if !end_pipe {
1176                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1177                cell_ends.push(trimmed.len());
1178            }
1179        } else {
1180            // No leading pipe
1181            cell_starts.push(0);
1182            cell_ends.push(pipe_positions[0]);
1183
1184            for i in 1..pipe_positions.len() {
1185                cell_starts.push(pipe_positions[i - 1] + 1);
1186                cell_ends.push(pipe_positions[i]);
1187            }
1188
1189            // Add last cell if there's no trailing pipe
1190            if !end_pipe {
1191                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1192                cell_ends.push(trimmed.len());
1193            }
1194        }
1195    }
1196
1197    // Emit leading whitespace if present (before trim)
1198    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1199    if leading_ws_len > 0 {
1200        builder.token(
1201            SyntaxKind::WHITESPACE.into(),
1202            &line_without_newline[..leading_ws_len],
1203        );
1204    }
1205
1206    // Emit cells with pipes
1207    for (idx, (start, end)) in cell_starts.iter().zip(cell_ends.iter()).enumerate() {
1208        // Emit pipe before cell (except for first cell if no leading pipe)
1209        if *start > 0 {
1210            builder.token(SyntaxKind::TEXT.into(), "|");
1211        } else if idx == 0 && trimmed.starts_with('|') {
1212            // Leading pipe
1213            builder.token(SyntaxKind::TEXT.into(), "|");
1214        }
1215
1216        // Get cell content with its whitespace
1217        let cell_with_ws = &trimmed[*start..*end];
1218        let cell_content = cell_with_ws.trim();
1219
1220        // Emit leading whitespace within cell
1221        let cell_leading_ws = &cell_with_ws[..cell_with_ws.len() - cell_with_ws.trim_start().len()];
1222        if !cell_leading_ws.is_empty() {
1223            builder.token(SyntaxKind::WHITESPACE.into(), cell_leading_ws);
1224        }
1225
1226        // Emit cell with inline parsing
1227        emit_table_cell(builder, cell_content, config);
1228
1229        // Emit trailing whitespace within cell
1230        let cell_trailing_ws_start = cell_leading_ws.len() + cell_content.len();
1231        if cell_trailing_ws_start < cell_with_ws.len() {
1232            builder.token(
1233                SyntaxKind::WHITESPACE.into(),
1234                &cell_with_ws[cell_trailing_ws_start..],
1235            );
1236        }
1237    }
1238
1239    // Emit trailing pipe if present
1240    if !pipe_positions.is_empty() && trimmed.ends_with('|') {
1241        builder.token(SyntaxKind::TEXT.into(), "|");
1242    }
1243
1244    // Emit trailing whitespace after trim (before newline)
1245    let trailing_ws_start = leading_ws_len + trimmed.len();
1246    if trailing_ws_start < line_without_newline.len() {
1247        builder.token(
1248            SyntaxKind::WHITESPACE.into(),
1249            &line_without_newline[trailing_ws_start..],
1250        );
1251    }
1252
1253    // Emit newline
1254    if !newline_str.is_empty() {
1255        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1256    }
1257
1258    builder.finish_node();
1259}
1260
1261/// Try to parse a pipe table starting at the given position.
1262/// Returns the number of lines consumed if successful.
1263pub(crate) fn try_parse_pipe_table(
1264    window: &StrippedLines<'_, '_>,
1265    builder: &mut GreenNodeBuilder<'static>,
1266    config: &ParserOptions,
1267) -> Option<usize> {
1268    let lines = window.raw();
1269    let start_pos = window.pos();
1270    if start_pos + 1 >= lines.len() {
1271        return None;
1272    }
1273
1274    // Cheap gate: a pipe table's first line must contain a `|` (it is either
1275    // the header or, headerless, the delimiter row), unless this is a
1276    // caption-led table. Table detection runs at every block start, so doing
1277    // any per-line work for every prose/math paragraph was quadratic on large
1278    // documents. Peek the dispatch line and run the (bounded) caption probe on
1279    // the same stripped `window` the detection below uses, so the gate applies
1280    // inside containers (blockquote/list) too — not just at top level.
1281    if !window.strip_at(start_pos).contains('|') && !is_caption_followed_by_table(window, start_pos)
1282    {
1283        return None;
1284    }
1285
1286    // Detection scans read the container-prefix-stripped view lazily through
1287    // the window (see `LineView`), so a table nested in `list → blockquote`
1288    // (e.g. `- > | a | b |`) has its `  > ` prefix removed before the
1289    // separator/cell shape checks. The dispatch line uses the emission-safe
1290    // line-0 strip (its prefix was consumed by the core); every other line
1291    // gets the full continuation strip. Scans stop at the first blank line, so
1292    // only a bounded range is stripped. Emission still reads raw `lines` so the
1293    // prefix bytes can be re-emitted as tokens.
1294
1295    // Check if this line is a caption followed by a table
1296    // If so, the actual table starts after the caption and blank line
1297    let (actual_start, caption_before) = if is_caption_followed_by_table(window, start_pos) {
1298        let (cap_start, cap_end) = caption_range_starting_at(window, start_pos)?;
1299        let mut pos = cap_end;
1300        while pos < window.line_count() && window.line(pos).trim().is_empty() {
1301            pos += 1;
1302        }
1303        (pos, Some((cap_start, cap_end)))
1304    } else {
1305        (start_pos, None)
1306    };
1307
1308    if actual_start + 1 >= lines.len() {
1309        return None;
1310    }
1311
1312    // First line should have pipes (potential header)
1313    if !window.line(actual_start).contains('|') {
1314        return None;
1315    }
1316
1317    // Second line should be separator
1318    let alignments = try_parse_pipe_separator(window.line(actual_start + 1))?;
1319
1320    // Parse header cells
1321    let header_cells = parse_pipe_table_row(window.line(actual_start));
1322
1323    // Number of columns should match (approximately - be lenient)
1324    if header_cells.len() != alignments.len() && !header_cells.is_empty() {
1325        // Only fail if very different
1326        if header_cells.len() < alignments.len() / 2 || header_cells.len() > alignments.len() * 2 {
1327            return None;
1328        }
1329    }
1330
1331    // Find table end (first blank line or end of input)
1332    let mut end_pos = actual_start + 2;
1333    while end_pos < window.line_count() {
1334        let line = window.line(end_pos);
1335        if line.trim().is_empty() {
1336            break;
1337        }
1338        // Row should have pipes
1339        if !line.contains('|') {
1340            break;
1341        }
1342        end_pos += 1;
1343    }
1344
1345    // Must have at least one data row
1346    if end_pos <= actual_start + 2 {
1347        return None;
1348    }
1349
1350    // Check for caption before table (only if we didn't already detect it)
1351    let caption_before = caption_before.or_else(|| find_caption_before_table(window, actual_start));
1352
1353    // Check for caption after table
1354    let caption_after = if caption_before.is_some() {
1355        None
1356    } else {
1357        find_caption_after_table(window, end_pos)
1358    };
1359
1360    // Build the pipe table
1361    builder.start_node(SyntaxKind::PIPE_TABLE.into());
1362
1363    // Emit caption before if present
1364    if let Some((cap_start, cap_end)) = caption_before {
1365        emit_table_caption(builder, window, cap_start, cap_end, config);
1366        // Emit blank line between caption and table if present
1367        emit_caption_blank_lines(builder, window, cap_end, actual_start);
1368    }
1369
1370    // Emit header row with inline-parsed cells. On the dispatch line the
1371    // core already emitted the container prefix; only when the header is a
1372    // continuation line (e.g. it follows a caption-before line) do we emit
1373    // the prefix here.
1374    emit_pipe_table_row(
1375        builder,
1376        window,
1377        actual_start,
1378        SyntaxKind::TABLE_HEADER,
1379        config,
1380    );
1381
1382    // Emit separator, re-emitting any continuation-line container prefix
1383    // (`  > `) as WHITESPACE/BLOCK_QUOTE_MARKER tokens before the row text.
1384    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1385    let sep_idx = actual_start + 1;
1386    let separator_tail = if sep_idx == window.dispatch_pos() {
1387        window.dispatch_tail()
1388    } else {
1389        window.emit_prefix_at(builder, sep_idx)
1390    };
1391    emit_line_tokens(builder, separator_tail);
1392    builder.finish_node();
1393
1394    // Emit data rows with inline-parsed cells (always continuation lines)
1395    for idx in (actual_start + 2)..end_pos {
1396        emit_pipe_table_row(builder, window, idx, SyntaxKind::TABLE_ROW, config);
1397    }
1398
1399    // Emit caption after if present
1400    if let Some((cap_start, cap_end)) = caption_after {
1401        // Emit blank line before caption if needed
1402        emit_caption_blank_lines(builder, window, end_pos, cap_start);
1403        emit_table_caption(builder, window, cap_start, cap_end, config);
1404    }
1405
1406    builder.finish_node(); // PipeTable
1407
1408    // Calculate lines consumed
1409    let table_start = caption_before
1410        .map(|(start, _)| start)
1411        .unwrap_or(actual_start);
1412    let table_end = if let Some((_, cap_end)) = caption_after {
1413        cap_end
1414    } else {
1415        end_pos
1416    };
1417
1418    Some(table_end - table_start)
1419}
1420
1421#[cfg(test)]
1422mod tests {
1423    use super::super::container_prefix::ContainerPrefix;
1424    use super::*;
1425
1426    #[test]
1427    fn test_separator_detection() {
1428        assert!(try_parse_table_separator("------- ------ ----------   -------").is_some());
1429        assert!(try_parse_table_separator("  ---  ---  ---").is_some());
1430        assert!(try_parse_table_separator("-------").is_none()); // horizontal rule
1431        assert!(try_parse_table_separator("--- --- ---").is_some()); // table separator
1432    }
1433
1434    #[test]
1435    fn test_column_extraction() {
1436        let line = "-------     ------ ----------   -------";
1437        let columns = extract_columns(line, 0);
1438        assert_eq!(columns.len(), 4);
1439    }
1440
1441    #[test]
1442    fn test_simple_table_with_header() {
1443        let input = vec![
1444            "  Right     Left     Center     Default",
1445            "-------     ------ ----------   -------",
1446            "     12     12        12            12",
1447            "    123     123       123          123",
1448            "",
1449        ];
1450
1451        let mut builder = GreenNodeBuilder::new();
1452        let prefix = ContainerPrefix::default();
1453        let window = StrippedLines::new(&input, 0, &prefix);
1454        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1455
1456        assert!(result.is_some());
1457        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1458    }
1459
1460    #[test]
1461    fn test_headerless_table() {
1462        let input = vec![
1463            "-------     ------ ----------   -------",
1464            "     12     12        12            12",
1465            "    123     123       123          123",
1466            "",
1467        ];
1468
1469        let mut builder = GreenNodeBuilder::new();
1470        let prefix = ContainerPrefix::default();
1471        let window = StrippedLines::new(&input, 0, &prefix);
1472        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1473
1474        assert!(result.is_some());
1475        assert_eq!(result.unwrap(), 3); // sep + 2 rows
1476    }
1477
1478    #[test]
1479    fn test_caption_prefix_detection() {
1480        assert!(try_parse_caption_prefix("Table: My caption").is_some());
1481        assert!(try_parse_caption_prefix("table: My caption").is_some());
1482        assert!(try_parse_caption_prefix(": My caption").is_some());
1483        assert!(try_parse_caption_prefix(":").is_none()); // Just colon, no content
1484        assert!(try_parse_caption_prefix("Not a caption").is_none());
1485    }
1486
1487    #[test]
1488    fn table_grid_starts_at_matches_each_kind() {
1489        // Positives — one shape per table kind the real parsers accept.
1490        assert!(table_grid_starts_at(&["+---+---+"][..], 0)); // grid
1491        assert!(table_grid_starts_at(&["----------- -------"][..], 0)); // multiline
1492        assert!(table_grid_starts_at(&["--- --- ---"][..], 0)); // simple, headerless
1493        assert!(table_grid_starts_at(&["A | B", "| --- | --- |"][..], 0)); // pipe, header + sep
1494        assert!(table_grid_starts_at(&["A    B", "--- ---"][..], 0)); // simple, header + sep
1495        // A lone dash run is a multiline full-width separator under Pandoc (not a
1496        // thematic break), so the lookahead intentionally accepts it; the full
1497        // parser then rejects it if no rows follow.
1498        assert!(table_grid_starts_at(&["-------"][..], 0));
1499
1500        // Negatives — shapes that must not read as a table start.
1501        assert!(!table_grid_starts_at(&["just some prose"][..], 0));
1502        assert!(!table_grid_starts_at(&["# Heading"][..], 0));
1503        assert!(!table_grid_starts_at(&["```", "code", "```"][..], 0)); // code fence
1504        assert!(!table_grid_starts_at(&["only one line"][..], 1)); // out of range
1505    }
1506
1507    /// The cheap caption lookahead must agree with what the full parser does:
1508    /// when it says a table follows the caption, a table node really forms; when
1509    /// it says no table follows, none does. This guards against the lookahead
1510    /// (`table_grid_starts_at`) drifting from the real per-kind parsers.
1511    #[test]
1512    fn caption_lookahead_agrees_with_real_parse() {
1513        let with_table = ": Cap\n\n| A | B |\n|---|---|\n| 1 | 2 |\n";
1514        let lines: Vec<&str> = with_table.lines().collect();
1515        assert!(is_caption_followed_by_table(&lines[..], 0));
1516        assert!(format!("{:#?}", crate::parse(with_table, None)).contains("PIPE_TABLE"));
1517
1518        let no_table = ": Cap\n\nplain paragraph\n";
1519        let lines: Vec<&str> = no_table.lines().collect();
1520        assert!(!is_caption_followed_by_table(&lines[..], 0));
1521        assert!(!format!("{:#?}", crate::parse(no_table, None)).contains("TABLE"));
1522    }
1523
1524    /// Pandoc parses `table` before `orderedList` (but `bulletList` before
1525    /// `table`) in its `block` choice. So an ordered marker whose line is the
1526    /// header of a valid pipe table is NOT a list: the whole construct is a
1527    /// top-level table absorbing the marker as the first header cell. Bullets
1528    /// and a lone ordered marker (no delimiter) stay lists. Verified against
1529    /// pandoc 3.9 (`-f markdown -t native`).
1530    #[test]
1531    fn ordered_marker_on_pipe_table_line_is_top_level_table() {
1532        let input = "1. | a | b |\n   | - | - |\n   | 1 | 2 |\n";
1533        let tree = crate::parse(input, None);
1534        assert!(
1535            tree.descendants()
1536                .any(|n| n.kind() == SyntaxKind::PIPE_TABLE),
1537            "ordered marker + pipe table on the marker line should be a top-level table"
1538        );
1539        assert!(
1540            !tree.descendants().any(|n| n.kind() == SyntaxKind::LIST),
1541            "it must not nest under a list"
1542        );
1543        // Lossless: the marker and the overflow cell survive in the CST.
1544        let dump = format!("{tree:#?}");
1545        assert!(
1546            dump.contains("1."),
1547            "marker text preserved as a header cell"
1548        );
1549        assert!(dump.contains('b'), "overflow cell `b` preserved (lossless)");
1550    }
1551
1552    #[test]
1553    fn lone_ordered_marker_pipe_line_is_a_list() {
1554        // No delimiter row → pandoc's `table` fails, `orderedList` catches it.
1555        let input = "1. | a | b |\n";
1556        let tree = crate::parse(input, None);
1557        assert!(
1558            tree.descendants().any(|n| n.kind() == SyntaxKind::LIST),
1559            "a lone ordered marker line stays a list"
1560        );
1561        assert!(
1562            !tree
1563                .descendants()
1564                .any(|n| n.kind() == SyntaxKind::PIPE_TABLE),
1565            "no table without a delimiter row"
1566        );
1567    }
1568
1569    #[test]
1570    fn bullet_marker_on_pipe_table_line_stays_a_nested_table() {
1571        // Bullets already match pandoc (`BulletList -> Table`): regression guard.
1572        let input = "- | a | b |\n  | - | - |\n  | 1 | 2 |\n";
1573        let tree = crate::parse(input, None);
1574        assert!(
1575            tree.descendants().any(|n| n.kind() == SyntaxKind::LIST),
1576            "bullet marker keeps the list"
1577        );
1578        assert!(
1579            tree.descendants()
1580                .any(|n| n.kind() == SyntaxKind::PIPE_TABLE),
1581            "with the table nested inside the list item"
1582        );
1583    }
1584
1585    #[test]
1586    fn bare_colon_fenced_code_is_not_table_caption() {
1587        let input = "Term\n: ```\n  code\n  ```\n";
1588        let tree = crate::parse(input, None);
1589
1590        assert!(
1591            tree.descendants()
1592                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1593            "should parse as definition list"
1594        );
1595        assert!(
1596            tree.descendants()
1597                .any(|node| node.kind() == SyntaxKind::CODE_BLOCK),
1598            "definition should preserve fenced code block"
1599        );
1600        assert!(
1601            !tree
1602                .descendants()
1603                .any(|node| node.kind() == SyntaxKind::TABLE_CAPTION),
1604            "fenced code definition should not be parsed as table caption"
1605        );
1606    }
1607
1608    #[test]
1609    fn bare_colon_caption_after_div_opening_is_table_caption() {
1610        let input = "::: {#tbl:panel layout.nrow=\"1\"}\n  : My Caption {#tbl:foo-1}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\n  : My Caption2 {#tbl:foo-2}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\nCaption\n:::\n";
1611        let tree = crate::parse(input, None);
1612
1613        let caption_count = tree
1614            .descendants()
1615            .filter(|node| node.kind() == SyntaxKind::TABLE_CAPTION)
1616            .count();
1617        assert_eq!(
1618            caption_count, 2,
1619            "expected both captions to attach to tables"
1620        );
1621        assert!(
1622            !tree
1623                .descendants()
1624                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1625            "caption lines in this fenced div table layout should not parse as definition list"
1626        );
1627    }
1628
1629    #[test]
1630    fn test_table_with_caption_after() {
1631        let input = vec![
1632            "  Right     Left     Center     Default",
1633            "-------     ------ ----------   -------",
1634            "     12     12        12            12",
1635            "    123     123       123          123",
1636            "",
1637            "Table: Demonstration of simple table syntax.",
1638            "",
1639        ];
1640
1641        let mut builder = GreenNodeBuilder::new();
1642        let prefix = ContainerPrefix::default();
1643        let window = StrippedLines::new(&input, 0, &prefix);
1644        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1645
1646        assert!(result.is_some());
1647        // Should consume: header + sep + 2 rows + blank + caption
1648        assert_eq!(result.unwrap(), 6);
1649    }
1650
1651    #[test]
1652    fn test_table_with_caption_before() {
1653        let input = vec![
1654            "Table: Demonstration of simple table syntax.",
1655            "",
1656            "  Right     Left     Center     Default",
1657            "-------     ------ ----------   -------",
1658            "     12     12        12            12",
1659            "    123     123       123          123",
1660            "",
1661        ];
1662
1663        let mut builder = GreenNodeBuilder::new();
1664        let prefix = ContainerPrefix::default();
1665        let window = StrippedLines::new(&input, 2, &prefix);
1666        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1667
1668        assert!(result.is_some());
1669        // Should consume: caption + blank + header + sep + 2 rows
1670        assert_eq!(result.unwrap(), 6);
1671    }
1672
1673    #[test]
1674    fn test_caption_with_colon_prefix() {
1675        let input = vec![
1676            "  Right     Left",
1677            "-------     ------",
1678            "     12     12",
1679            "",
1680            ": Short caption",
1681            "",
1682        ];
1683
1684        let mut builder = GreenNodeBuilder::new();
1685        let prefix = ContainerPrefix::default();
1686        let window = StrippedLines::new(&input, 0, &prefix);
1687        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1688
1689        assert!(result.is_some());
1690        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1691    }
1692
1693    #[test]
1694    fn test_multiline_caption() {
1695        let input = vec![
1696            "  Right     Left",
1697            "-------     ------",
1698            "     12     12",
1699            "",
1700            "Table: This is a longer caption",
1701            "that spans multiple lines.",
1702            "",
1703        ];
1704
1705        let mut builder = GreenNodeBuilder::new();
1706        let prefix = ContainerPrefix::default();
1707        let window = StrippedLines::new(&input, 0, &prefix);
1708        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1709
1710        assert!(result.is_some());
1711        // Should consume through end of multi-line caption
1712        assert_eq!(result.unwrap(), 6);
1713    }
1714
1715    #[test]
1716    fn test_simple_table_with_multibyte_cell_content() {
1717        let input = vec![
1718            "Name            Hex code     Hue     C, M, Y, K (%)   R, G, B (0-255)   R, G, B (%)",
1719            "--------------  ------------ ------- ---------------- ----------------- ------------",
1720            "        orange       #E69F00     41° 0, 50, 100, 0    230, 159, 0       90, 60, 0",
1721            "      sky blue       #56B4E9    202° 80, 0, 0, 0      86, 180, 233      35, 70, 90",
1722            "",
1723        ];
1724
1725        let mut builder = GreenNodeBuilder::new();
1726        let prefix = ContainerPrefix::default();
1727        let window = StrippedLines::new(&input, 0, &prefix);
1728        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1729
1730        assert!(result.is_some());
1731        assert_eq!(result.unwrap(), 4);
1732    }
1733
1734    // Pipe table tests
1735    #[test]
1736    fn test_pipe_separator_detection() {
1737        assert!(try_parse_pipe_separator("|------:|:-----|---------|:------:|").is_some());
1738        assert!(try_parse_pipe_separator("|---|---|").is_some());
1739        assert!(try_parse_pipe_separator("-----|-----:").is_some()); // No leading pipe
1740        assert!(try_parse_pipe_separator("|-----+-------|").is_some()); // Orgtbl variant
1741        assert!(try_parse_pipe_separator("not a separator").is_none());
1742    }
1743
1744    #[test]
1745    fn test_pipe_alignments() {
1746        let aligns = try_parse_pipe_separator("|------:|:-----|---------|:------:|").unwrap();
1747        assert_eq!(aligns.len(), 4);
1748        assert_eq!(aligns[0], Alignment::Right);
1749        assert_eq!(aligns[1], Alignment::Left);
1750        assert_eq!(aligns[2], Alignment::Default);
1751        assert_eq!(aligns[3], Alignment::Center);
1752    }
1753
1754    #[test]
1755    fn test_parse_pipe_table_row() {
1756        let cells = parse_pipe_table_row("| Right | Left | Center |");
1757        assert_eq!(cells.len(), 3);
1758        assert_eq!(cells[0], "Right");
1759        assert_eq!(cells[1], "Left");
1760        assert_eq!(cells[2], "Center");
1761
1762        // Without leading/trailing pipes
1763        let cells2 = parse_pipe_table_row("Right | Left | Center");
1764        assert_eq!(cells2.len(), 3);
1765    }
1766
1767    #[test]
1768    fn test_basic_pipe_table() {
1769        let input = vec![
1770            "",
1771            "| Right | Left | Center |",
1772            "|------:|:-----|:------:|",
1773            "|   12  |  12  |   12   |",
1774            "|  123  |  123 |  123   |",
1775            "",
1776        ];
1777
1778        let mut builder = GreenNodeBuilder::new();
1779        let prefix = ContainerPrefix::default();
1780        let window = StrippedLines::new(&input, 1, &prefix);
1781        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1782
1783        assert!(result.is_some());
1784        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1785    }
1786
1787    #[test]
1788    fn test_pipe_table_no_edge_pipes() {
1789        let input = vec![
1790            "",
1791            "fruit| price",
1792            "-----|-----:",
1793            "apple|2.05",
1794            "pear|1.37",
1795            "",
1796        ];
1797
1798        let mut builder = GreenNodeBuilder::new();
1799        let prefix = ContainerPrefix::default();
1800        let window = StrippedLines::new(&input, 1, &prefix);
1801        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1802
1803        assert!(result.is_some());
1804        assert_eq!(result.unwrap(), 4);
1805    }
1806
1807    #[test]
1808    fn test_pipe_table_with_caption() {
1809        let input = vec![
1810            "",
1811            "| Col1 | Col2 |",
1812            "|------|------|",
1813            "| A    | B    |",
1814            "",
1815            "Table: My pipe table",
1816            "",
1817        ];
1818
1819        let mut builder = GreenNodeBuilder::new();
1820        let prefix = ContainerPrefix::default();
1821        let window = StrippedLines::new(&input, 1, &prefix);
1822        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1823
1824        assert!(result.is_some());
1825        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1826    }
1827
1828    #[test]
1829    fn test_pipe_table_with_multiline_caption_before() {
1830        let input = vec![
1831            ": (#tab:base) base R quoting",
1832            "functions",
1833            "",
1834            "| C | D |",
1835            "|---|---|",
1836            "| 3 | 4 |",
1837            "",
1838        ];
1839
1840        let mut builder = GreenNodeBuilder::new();
1841        let prefix = ContainerPrefix::default();
1842        let window = StrippedLines::new(&input, 0, &prefix);
1843        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1844
1845        assert!(result.is_some());
1846        // caption(2) + blank(1) + header + sep + row
1847        assert_eq!(result.unwrap(), 6);
1848    }
1849}
1850
1851// ============================================================================
1852// Grid Table Parsing
1853// ============================================================================
1854
1855/// Check if a line is a grid table row separator (starts with +, contains -, ends with +).
1856/// Returns Some(vec of column info) if valid, None otherwise.
1857fn try_parse_grid_separator(line: &str) -> Option<Vec<GridColumn>> {
1858    let trimmed = line.trim_start();
1859    let leading_spaces = line.len() - trimmed.len();
1860
1861    // A grid border must begin at column 0 of its container content. Detection
1862    // runs on the container-prefix-stripped line (see `try_parse_grid_table`),
1863    // so any remaining leading whitespace means the border is indented relative
1864    // to its container -- pandoc parses that as a paragraph, not a grid table.
1865    if leading_spaces > 0 {
1866        return None;
1867    }
1868
1869    // Must start with + and end with +
1870    if !trimmed.starts_with('+') || !trimmed.trim_end().ends_with('+') {
1871        return None;
1872    }
1873
1874    // Split by + to get column segments
1875    let trimmed = trimmed.trim_end();
1876    let segments: Vec<&str> = trimmed.split('+').collect();
1877
1878    // Need at least 3 parts: empty before first +, column(s), empty after last +
1879    if segments.len() < 3 {
1880        return None;
1881    }
1882
1883    let mut columns = Vec::new();
1884
1885    // Parse each segment between + signs
1886    for segment in segments.iter().skip(1).take(segments.len() - 2) {
1887        if segment.is_empty() {
1888            continue;
1889        }
1890
1891        // Segment must be dashes/equals with optional colons for alignment
1892        let seg_trimmed = *segment;
1893
1894        // Get the fill character (after removing colons)
1895        let inner = seg_trimmed.trim_start_matches(':').trim_end_matches(':');
1896
1897        // Must be all dashes or all equals
1898        if inner.is_empty() {
1899            return None;
1900        }
1901
1902        let first_char = inner.chars().next().unwrap();
1903        if first_char != '-' && first_char != '=' {
1904            return None;
1905        }
1906
1907        if !inner.chars().all(|c| c == first_char) {
1908            return None;
1909        }
1910
1911        let is_header_sep = first_char == '=';
1912
1913        columns.push(GridColumn {
1914            is_header_separator: is_header_sep,
1915            width: seg_trimmed.chars().count(),
1916        });
1917    }
1918
1919    if columns.is_empty() {
1920        None
1921    } else {
1922        Some(columns)
1923    }
1924}
1925
1926/// Column information for grid tables.
1927#[derive(Debug, Clone)]
1928struct GridColumn {
1929    is_header_separator: bool,
1930    width: usize,
1931}
1932
1933fn slice_cell_by_display_width(line: &str, start_byte: usize, width: usize) -> (usize, usize) {
1934    let mut end_byte = start_byte;
1935    let mut display_cols = 0usize;
1936
1937    for (offset, ch) in line[start_byte..].char_indices() {
1938        if ch == '|' {
1939            let sep_byte = start_byte + offset;
1940            return (sep_byte, sep_byte + 1);
1941        }
1942        let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
1943        if display_cols + ch_width > width {
1944            break;
1945        }
1946        display_cols += ch_width;
1947        end_byte = start_byte + offset + ch.len_utf8();
1948        if display_cols >= width {
1949            break;
1950        }
1951    }
1952
1953    // If the width budget is exhausted before seeing a separator (for example
1954    // because of padding/layout drift), advance to the next literal separator
1955    // to keep row slicing aligned and preserve losslessness.
1956    let mut sep_byte = end_byte;
1957    while sep_byte < line.len() {
1958        let mut chars = line[sep_byte..].chars();
1959        let Some(ch) = chars.next() else {
1960            break;
1961        };
1962        if ch == '|' {
1963            return (sep_byte, sep_byte + 1);
1964        }
1965        sep_byte += ch.len_utf8();
1966    }
1967
1968    (end_byte, end_byte)
1969}
1970
1971/// Check if a line is a grid table content row.
1972/// Accepts normal rows ending with `|` and spanning-style continuation lines ending with `+`.
1973fn is_grid_content_row(line: &str) -> bool {
1974    let trimmed = line.trim_start();
1975    let leading_spaces = line.len() - trimmed.len();
1976
1977    if leading_spaces > 3 {
1978        return false;
1979    }
1980
1981    let trimmed = trimmed.trim_end();
1982    trimmed.starts_with('|') && (trimmed.ends_with('|') || trimmed.ends_with('+'))
1983}
1984
1985/// Extract cell contents from a single grid table row line.
1986/// Returns a vector of cell contents (trimmed) based on column boundaries.
1987/// Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1988fn extract_grid_cells_from_line(line: &str, _columns: &[GridColumn]) -> Vec<String> {
1989    let (line_content, _) = strip_newline(line);
1990    let line_trimmed = line_content.trim();
1991
1992    if !line_trimmed.starts_with('|') || !line_trimmed.ends_with('|') {
1993        return vec![String::new(); _columns.len()];
1994    }
1995
1996    let mut cells = Vec::with_capacity(_columns.len());
1997    let mut pos_byte = 1; // Skip leading pipe
1998
1999    for col in _columns {
2000        let col_idx = cells.len();
2001        if pos_byte >= line_trimmed.len() {
2002            cells.push(String::new());
2003            continue;
2004        }
2005
2006        let start_byte = pos_byte;
2007        let end_byte = if col_idx + 1 == _columns.len() {
2008            line_trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
2009        } else {
2010            let (end, next_start) = slice_cell_by_display_width(line_trimmed, pos_byte, col.width);
2011            pos_byte = next_start;
2012            end
2013        };
2014        cells.push(line_trimmed[start_byte..end_byte].trim().to_string());
2015        if col_idx + 1 == _columns.len() {
2016            pos_byte = line_trimmed.len();
2017        }
2018    }
2019
2020    cells
2021}
2022
2023/// Emit a grid table row with inline-parsed cells.
2024/// Handles multi-line rows by emitting first line with TABLE_CELL nodes,
2025/// then continuation lines as raw TEXT for losslessness.
2026fn emit_grid_table_row(
2027    builder: &mut GreenNodeBuilder<'static>,
2028    window: &StrippedLines<'_, '_>,
2029    indices: &[usize],
2030    columns: &[GridColumn],
2031    row_kind: SyntaxKind,
2032    config: &ParserOptions,
2033) {
2034    if indices.is_empty() {
2035        return;
2036    }
2037
2038    builder.start_node(row_kind.into());
2039
2040    // Emit first line with TABLE_CELL nodes. The continuation-line container
2041    // prefix (`  > `) is re-emitted as WHITESPACE/BLOCK_QUOTE_MARKER tokens
2042    // inside the row node before the cell text; the returned tail is the
2043    // prefix-stripped line we slice cells from (empty prefix ⇒ raw line).
2044    // Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
2045    let first_line = window.emit_or_dispatch_tail(builder, indices[0]);
2046    let cell_contents = extract_grid_cells_from_line(first_line, columns);
2047    let (line_without_newline, newline_str) = strip_newline(first_line);
2048    let trimmed = line_without_newline.trim();
2049    let expected_pipe_count = columns.len().saturating_add(1);
2050    let actual_pipe_count = trimmed.chars().filter(|&c| c == '|').count();
2051
2052    // Rows that don't contain all expected column separators (spanning-style rows)
2053    // must be emitted verbatim for losslessness. The first line's prefix was
2054    // already consumed above; emit its tail and each continuation tail.
2055    if actual_pipe_count != expected_pipe_count {
2056        emit_line_tokens(builder, first_line);
2057        for &idx in &indices[1..] {
2058            let tail = window.emit_or_dispatch_tail(builder, idx);
2059            emit_line_tokens(builder, tail);
2060        }
2061        builder.finish_node();
2062        return;
2063    }
2064
2065    // Emit leading whitespace
2066    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
2067    if leading_ws_len > 0 {
2068        builder.token(
2069            SyntaxKind::WHITESPACE.into(),
2070            &line_without_newline[..leading_ws_len],
2071        );
2072    }
2073
2074    // Emit leading pipe
2075    if trimmed.starts_with('|') {
2076        builder.token(SyntaxKind::TEXT.into(), "|");
2077    }
2078
2079    // Emit each cell based on fixed column widths from separators
2080    let mut pos_byte = 1usize; // after leading pipe
2081    for (idx, cell_content) in cell_contents.iter().enumerate() {
2082        let part = if idx < columns.len() && pos_byte <= trimmed.len() {
2083            let start_byte = pos_byte;
2084            let end_byte = if idx + 1 == columns.len() && !trimmed.is_empty() {
2085                trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
2086            } else {
2087                let (end, next_start) =
2088                    slice_cell_by_display_width(trimmed, pos_byte, columns[idx].width);
2089                pos_byte = next_start;
2090                end
2091            };
2092            let slice = &trimmed[start_byte..end_byte];
2093            if idx + 1 == columns.len() {
2094                pos_byte = trimmed.len();
2095            }
2096            slice
2097        } else {
2098            ""
2099        };
2100
2101        // Emit leading whitespace in cell
2102        let cell_trimmed = part.trim();
2103        let ws_start_len = part.len() - part.trim_start().len();
2104        if ws_start_len > 0 {
2105            builder.token(SyntaxKind::WHITESPACE.into(), &part[..ws_start_len]);
2106        }
2107
2108        // Emit TABLE_CELL with inline parsing
2109        emit_table_cell(builder, cell_content, config);
2110
2111        // Emit trailing whitespace in cell
2112        let ws_end_start = ws_start_len + cell_trimmed.len();
2113        if ws_end_start < part.len() {
2114            builder.token(SyntaxKind::WHITESPACE.into(), &part[ws_end_start..]);
2115        }
2116
2117        // Emit pipe separator (unless this is the last cell and line doesn't end with |)
2118        if idx < cell_contents.len() - 1 || trimmed.ends_with('|') {
2119            builder.token(SyntaxKind::TEXT.into(), "|");
2120        }
2121    }
2122
2123    // Emit trailing whitespace before newline
2124    let trailing_ws_start = leading_ws_len + trimmed.len();
2125    if trailing_ws_start < line_without_newline.len() {
2126        builder.token(
2127            SyntaxKind::WHITESPACE.into(),
2128            &line_without_newline[trailing_ws_start..],
2129        );
2130    }
2131
2132    // Emit newline
2133    if !newline_str.is_empty() {
2134        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2135    }
2136
2137    // Emit continuation lines as TEXT for losslessness, re-emitting each
2138    // line's container prefix first.
2139    for &idx in &indices[1..] {
2140        let tail = window.emit_or_dispatch_tail(builder, idx);
2141        emit_line_tokens(builder, tail);
2142    }
2143
2144    builder.finish_node();
2145}
2146
2147/// Try to parse a grid table starting at the given position.
2148/// Returns the number of lines consumed if successful.
2149pub(crate) fn try_parse_grid_table(
2150    window: &StrippedLines<'_, '_>,
2151    builder: &mut GreenNodeBuilder<'static>,
2152    config: &ParserOptions,
2153) -> Option<usize> {
2154    let lines = window.raw();
2155    let start_pos = window.pos();
2156    if start_pos >= lines.len() {
2157        return None;
2158    }
2159
2160    // Grid-border detection reads the stripped view through `UniformStripView`,
2161    // which strips *every* line — including the dispatch line — with the full
2162    // container strip. The strict column-0 check in `try_parse_grid_separator`
2163    // would otherwise reject a `+---+` border sitting at column 0 of a list
2164    // item's inner content if the dispatch line kept its list-indent. With an
2165    // empty prefix the stripped view equals the raw lines. Emission still goes
2166    // through `window.emit_or_dispatch_tail`, which preserves the indent bytes.
2167    // Scans stop at the first blank line, so only a bounded range is stripped.
2168    let view = UniformStripView(window);
2169
2170    // Cheap gate: a grid table's first line is a grid separator (`+---+`/`+===+`),
2171    // unless this is a caption-led table. Table detection runs at every block
2172    // start, so any per-line work for every prose/math paragraph was quadratic
2173    // on large documents. Run the gate on the same `view` the detection uses, so
2174    // it applies inside containers (blockquote/list) too — not just at top level.
2175    if try_parse_grid_separator(view.line(start_pos)).is_none()
2176        && !is_caption_followed_by_table(&view, start_pos)
2177    {
2178        return None;
2179    }
2180
2181    // Check if this line is a caption followed by a table
2182    // If so, the actual table starts after the caption and blank line
2183    let (actual_start, caption_before) = if is_caption_followed_by_table(&view, start_pos) {
2184        let (cap_start, cap_end) = caption_range_starting_at(&view, start_pos)?;
2185        let mut pos = cap_end;
2186        while pos < view.line_count() && view.line(pos).trim().is_empty() {
2187            pos += 1;
2188        }
2189        (pos, Some((cap_start, cap_end)))
2190    } else {
2191        (start_pos, None)
2192    };
2193
2194    if actual_start >= lines.len() {
2195        return None;
2196    }
2197
2198    // First line must be a grid separator
2199    let first_line = view.line(actual_start);
2200    let _columns = try_parse_grid_separator(first_line)?;
2201
2202    // Track table structure
2203    let mut end_pos = actual_start + 1;
2204    let mut found_header_sep = false;
2205    let mut in_footer = false;
2206
2207    // Scan table lines
2208    while end_pos < lines.len() {
2209        let line = view.line(end_pos);
2210
2211        // Check for blank line (table ends)
2212        if line.trim().is_empty() {
2213            break;
2214        }
2215
2216        // Check for separator line
2217        if let Some(sep_cols) = try_parse_grid_separator(line) {
2218            // Check if this is a header separator (=)
2219            if sep_cols.iter().any(|c| c.is_header_separator) {
2220                if !found_header_sep {
2221                    found_header_sep = true;
2222                } else if !in_footer {
2223                    // Second = separator starts footer
2224                    in_footer = true;
2225                }
2226            }
2227            end_pos += 1;
2228            continue;
2229        }
2230
2231        // Check for content row
2232        if is_grid_content_row(line) {
2233            end_pos += 1;
2234            continue;
2235        }
2236
2237        // Not a valid grid table line - table ends
2238        break;
2239    }
2240
2241    // Must have consumed at least 3 lines (top separator, content, bottom separator)
2242    // Or just top + content rows that end with a separator
2243    if end_pos <= actual_start + 1 {
2244        return None;
2245    }
2246
2247    // Last consumed line should be a separator for a well-formed table
2248    // But we'll be lenient and accept tables ending with content rows
2249
2250    // Check for caption before table (only if we didn't already detected it)
2251    let caption_before = caption_before.or_else(|| find_caption_before_table(&view, actual_start));
2252
2253    // Check for caption after table
2254    let caption_after = if caption_before.is_some() {
2255        None
2256    } else {
2257        find_caption_after_table(&view, end_pos)
2258    };
2259
2260    // Build the grid table
2261    builder.start_node(SyntaxKind::GRID_TABLE.into());
2262
2263    // Emit caption before if present
2264    if let Some((cap_start, cap_end)) = caption_before {
2265        emit_table_caption(builder, window, cap_start, cap_end, config);
2266        // Emit blank line between caption and table if present
2267        emit_caption_blank_lines(builder, window, cap_end, actual_start);
2268    }
2269
2270    // Track whether we've passed the header separator
2271    let mut past_header_sep = false;
2272    let mut in_footer_section = false;
2273    // Accumulate ABSOLUTE indices of the lines making up a multi-line row, so
2274    // each line's container prefix can be re-emitted via the window.
2275    let mut current_row_indices: Vec<usize> = Vec::new();
2276    let mut current_row_kind = SyntaxKind::TABLE_HEADER;
2277
2278    // Emit table rows - accumulate multi-line cells
2279    for idx in actual_start..end_pos {
2280        let line = view.line(idx);
2281        if let Some(sep_cols) = try_parse_grid_separator(line) {
2282            // Separator line - emit any accumulated row first
2283            if !current_row_indices.is_empty() {
2284                emit_grid_table_row(
2285                    builder,
2286                    window,
2287                    &current_row_indices,
2288                    &sep_cols,
2289                    current_row_kind,
2290                    config,
2291                );
2292                current_row_indices.clear();
2293            }
2294
2295            let is_header_sep = sep_cols.iter().any(|c| c.is_header_separator);
2296
2297            // Re-emit any continuation-line container prefix (`  > `) as
2298            // WHITESPACE/BLOCK_QUOTE_MARKER tokens before the separator text.
2299            if is_header_sep {
2300                if !past_header_sep {
2301                    // This is the header/body separator
2302                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2303                    let tail = window.emit_or_dispatch_tail(builder, idx);
2304                    emit_line_tokens(builder, tail);
2305                    builder.finish_node();
2306                    past_header_sep = true;
2307                } else {
2308                    // Footer separator
2309                    if !in_footer_section {
2310                        in_footer_section = true;
2311                    }
2312                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2313                    let tail = window.emit_or_dispatch_tail(builder, idx);
2314                    emit_line_tokens(builder, tail);
2315                    builder.finish_node();
2316                }
2317            } else {
2318                // Regular separator (row boundary)
2319                builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2320                let tail = window.emit_or_dispatch_tail(builder, idx);
2321                emit_line_tokens(builder, tail);
2322                builder.finish_node();
2323            }
2324        } else if is_grid_content_row(line) {
2325            // Content row - accumulate for multi-line cells
2326            current_row_kind = if !past_header_sep && found_header_sep {
2327                SyntaxKind::TABLE_HEADER
2328            } else if in_footer_section {
2329                SyntaxKind::TABLE_FOOTER
2330            } else {
2331                SyntaxKind::TABLE_ROW
2332            };
2333
2334            current_row_indices.push(idx);
2335        }
2336    }
2337
2338    // Emit any remaining accumulated row
2339    if !current_row_indices.is_empty() {
2340        // Use first separator's columns for cell boundaries
2341        if let Some(sep_cols) = try_parse_grid_separator(view.line(actual_start)) {
2342            emit_grid_table_row(
2343                builder,
2344                window,
2345                &current_row_indices,
2346                &sep_cols,
2347                current_row_kind,
2348                config,
2349            );
2350        }
2351    }
2352
2353    // Emit caption after if present
2354    if let Some((cap_start, cap_end)) = caption_after {
2355        emit_caption_blank_lines(builder, window, end_pos, cap_start);
2356        emit_table_caption(builder, window, cap_start, cap_end, config);
2357    }
2358
2359    builder.finish_node(); // GRID_TABLE
2360
2361    // Calculate lines consumed
2362    let table_start = caption_before
2363        .map(|(start, _)| start)
2364        .unwrap_or(actual_start);
2365    let table_end = if let Some((_, cap_end)) = caption_after {
2366        cap_end
2367    } else {
2368        end_pos
2369    };
2370
2371    Some(table_end - table_start)
2372}
2373
2374#[cfg(test)]
2375mod grid_table_tests {
2376    use super::super::container_prefix::ContainerPrefix;
2377    use super::*;
2378
2379    #[test]
2380    fn test_grid_separator_detection() {
2381        assert!(try_parse_grid_separator("+---+---+").is_some());
2382        assert!(try_parse_grid_separator("+===+===+").is_some());
2383        assert!(try_parse_grid_separator("+---------------+---------------+").is_some());
2384        assert!(try_parse_grid_separator("+:---:+").is_some()); // center aligned
2385        assert!(try_parse_grid_separator("not a separator").is_none());
2386        assert!(try_parse_grid_separator("|---|---|").is_none()); // pipe table sep
2387
2388        // A grid border must sit at column 0 of its container content; an
2389        // indented border is not a grid table (matches pandoc, which parses
2390        // an indented `+---+` as a paragraph). Detection runs on the
2391        // container-stripped line, so any remaining leading space disqualifies.
2392        assert!(try_parse_grid_separator(" +---+---+").is_none());
2393        assert!(try_parse_grid_separator("  +---+---+").is_none());
2394        assert!(try_parse_grid_separator("   +===+===+").is_none());
2395    }
2396
2397    #[test]
2398    fn test_grid_header_separator() {
2399        let cols = try_parse_grid_separator("+===+===+").unwrap();
2400        assert!(cols.iter().all(|c| c.is_header_separator));
2401
2402        let cols2 = try_parse_grid_separator("+---+---+").unwrap();
2403        assert!(cols2.iter().all(|c| !c.is_header_separator));
2404    }
2405
2406    #[test]
2407    fn test_grid_content_row_detection() {
2408        assert!(is_grid_content_row("| content | content |"));
2409        assert!(is_grid_content_row("|  |  |"));
2410        assert!(is_grid_content_row("| content +------+"));
2411        assert!(!is_grid_content_row("+---+---+")); // separator, not content
2412        assert!(!is_grid_content_row("no pipes here"));
2413    }
2414
2415    #[test]
2416    fn test_basic_grid_table() {
2417        let input = vec![
2418            "+-------+-------+",
2419            "| Col1  | Col2  |",
2420            "+=======+=======+",
2421            "| A     | B     |",
2422            "+-------+-------+",
2423            "",
2424        ];
2425
2426        let mut builder = GreenNodeBuilder::new();
2427        let prefix = ContainerPrefix::default();
2428        let window = StrippedLines::new(&input, 0, &prefix);
2429        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2430
2431        assert!(result.is_some());
2432        assert_eq!(result.unwrap(), 5);
2433    }
2434
2435    #[test]
2436    fn test_grid_table_multirow() {
2437        let input = vec![
2438            "+---------------+---------------+",
2439            "| Fruit         | Advantages    |",
2440            "+===============+===============+",
2441            "| Bananas       | - wrapper     |",
2442            "|               | - color       |",
2443            "+---------------+---------------+",
2444            "| Oranges       | - scurvy      |",
2445            "|               | - tasty       |",
2446            "+---------------+---------------+",
2447            "",
2448        ];
2449
2450        let mut builder = GreenNodeBuilder::new();
2451        let prefix = ContainerPrefix::default();
2452        let window = StrippedLines::new(&input, 0, &prefix);
2453        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2454
2455        assert!(result.is_some());
2456        assert_eq!(result.unwrap(), 9);
2457    }
2458
2459    #[test]
2460    fn test_grid_table_with_footer() {
2461        let input = vec![
2462            "+-------+-------+",
2463            "| Fruit | Price |",
2464            "+=======+=======+",
2465            "| Apple | $1.00 |",
2466            "+-------+-------+",
2467            "| Pear  | $1.50 |",
2468            "+=======+=======+",
2469            "| Total | $2.50 |",
2470            "+=======+=======+",
2471            "",
2472        ];
2473
2474        let mut builder = GreenNodeBuilder::new();
2475        let prefix = ContainerPrefix::default();
2476        let window = StrippedLines::new(&input, 0, &prefix);
2477        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2478
2479        assert!(result.is_some());
2480        assert_eq!(result.unwrap(), 9);
2481    }
2482
2483    #[test]
2484    fn test_grid_table_headerless() {
2485        let input = vec![
2486            "+-------+-------+",
2487            "| A     | B     |",
2488            "+-------+-------+",
2489            "| C     | D     |",
2490            "+-------+-------+",
2491            "",
2492        ];
2493
2494        let mut builder = GreenNodeBuilder::new();
2495        let prefix = ContainerPrefix::default();
2496        let window = StrippedLines::new(&input, 0, &prefix);
2497        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2498
2499        assert!(result.is_some());
2500        assert_eq!(result.unwrap(), 5);
2501    }
2502
2503    #[test]
2504    fn test_grid_table_with_caption_before() {
2505        let input = vec![
2506            ": Sample table",
2507            "",
2508            "+-------+-------+",
2509            "| A     | B     |",
2510            "+=======+=======+",
2511            "| C     | D     |",
2512            "+-------+-------+",
2513            "",
2514        ];
2515
2516        let mut builder = GreenNodeBuilder::new();
2517        let prefix = ContainerPrefix::default();
2518        let window = StrippedLines::new(&input, 2, &prefix);
2519        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2520
2521        assert!(result.is_some());
2522        // Should include caption + blank + table
2523        assert_eq!(result.unwrap(), 7);
2524    }
2525
2526    #[test]
2527    fn test_grid_table_with_caption_after() {
2528        let input = vec![
2529            "+-------+-------+",
2530            "| A     | B     |",
2531            "+=======+=======+",
2532            "| C     | D     |",
2533            "+-------+-------+",
2534            "",
2535            "Table: My grid table",
2536            "",
2537        ];
2538
2539        let mut builder = GreenNodeBuilder::new();
2540        let prefix = ContainerPrefix::default();
2541        let window = StrippedLines::new(&input, 0, &prefix);
2542        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2543
2544        assert!(result.is_some());
2545        // table + blank + caption
2546        assert_eq!(result.unwrap(), 7);
2547    }
2548}
2549
2550// ============================================================================
2551// Multiline Table Parsing
2552// ============================================================================
2553
2554/// Check if a line is a multiline table separator (continuous dashes).
2555/// Multiline table separators span the full width and are all dashes.
2556/// Returns Some(columns) if valid, None otherwise.
2557fn try_parse_multiline_separator(line: &str) -> Option<Vec<Column>> {
2558    let trimmed = line.trim_start();
2559    let leading_spaces = line.len() - trimmed.len();
2560
2561    // Must have leading spaces <= 3 to not be a code block
2562    if leading_spaces > 3 {
2563        return None;
2564    }
2565
2566    let trimmed = trimmed.trim_end();
2567
2568    // Must be all dashes (continuous line of dashes)
2569    if trimmed.is_empty() || !trimmed.chars().all(|c| c == '-') {
2570        return None;
2571    }
2572
2573    // Must have at least 3 dashes
2574    if trimmed.len() < 3 {
2575        return None;
2576    }
2577
2578    // This is a full-width separator - columns will be determined by column separator lines
2579    Some(vec![Column {
2580        start: leading_spaces,
2581        end: leading_spaces + trimmed.len(),
2582        alignment: Alignment::Default,
2583    }])
2584}
2585
2586/// Check if a line is a column separator line for multiline tables.
2587/// Column separators have dashes with spaces between them to define columns.
2588fn is_column_separator(line: &str) -> bool {
2589    try_parse_table_separator(line).is_some() && !line.contains('*') && !line.contains('_')
2590}
2591
2592fn is_headerless_single_row_without_blank(
2593    lines: &(impl LineView + ?Sized),
2594    row_start: usize,
2595    row_end: usize,
2596    columns: &[Column],
2597) -> bool {
2598    if row_start >= row_end {
2599        return false;
2600    }
2601
2602    if row_end - row_start == 1 {
2603        return false;
2604    }
2605
2606    let Some(last_col) = columns.last() else {
2607        return false;
2608    };
2609
2610    for i in (row_start + 1)..row_end {
2611        let (content, _) = strip_newline(lines.line(i));
2612        let prefix_end = last_col.start.min(content.len());
2613        if !content[..prefix_end].trim().is_empty() {
2614            return false;
2615        }
2616    }
2617
2618    true
2619}
2620
2621/// Try to parse a multiline table starting at the given position.
2622/// Returns the number of lines consumed if successful.
2623pub(crate) fn try_parse_multiline_table(
2624    window: &StrippedLines<'_, '_>,
2625    builder: &mut GreenNodeBuilder<'static>,
2626    config: &ParserOptions,
2627) -> Option<usize> {
2628    let lines = window.raw();
2629    let start_pos = window.pos();
2630    if start_pos >= lines.len() {
2631        return None;
2632    }
2633
2634    // Cheap gate: a multiline table's first line is either a full-width dash
2635    // separator or a column separator. Table detection runs at every block
2636    // start, so any per-line work for every paragraph that can't begin a
2637    // multiline table was quadratic on large documents. Peek just the dispatch
2638    // line via `strip_at` and bail before any further scanning.
2639    let first_line = window.strip_at(start_pos);
2640
2641    // First line can be either:
2642    // 1. A full-width dash separator (for tables with headers)
2643    // 2. A column separator (for headerless tables)
2644    let is_full_width_start = try_parse_multiline_separator(first_line).is_some();
2645    let is_column_sep_start = !is_full_width_start && is_column_separator(first_line);
2646    if !is_full_width_start && !is_column_sep_start {
2647        return None;
2648    }
2649
2650    // Detection scans read the container-prefix-stripped view lazily through the
2651    // window (see `LineView`) so a multiline table nested in `list → blockquote`
2652    // (e.g. `- > ----`) has its `  > ` prefix removed before the
2653    // separator/blank-row shape checks. The interior `>`-only row then strips to
2654    // `""` and registers as a blank row separator. With an empty prefix the
2655    // stripped view equals the raw lines. Scans stop at the first blank/closing
2656    // line, so only a bounded range is stripped. Emission re-emits the prefix
2657    // bytes as tokens via the window; captions read raw `lines`.
2658    let headerless_columns = if is_column_sep_start {
2659        try_parse_table_separator(window.line(start_pos))
2660    } else {
2661        None
2662    };
2663
2664    // Look ahead to find the structure
2665    let mut pos = start_pos + 1;
2666    let mut found_column_sep = is_column_sep_start; // Already found if headerless
2667    let mut column_sep_pos = if is_column_sep_start { start_pos } else { 0 };
2668    let mut has_header = false;
2669    let mut found_blank_line = false;
2670    let mut found_closing_sep = false;
2671    let mut content_line_count = 0usize;
2672
2673    // Scan for header section and column separator
2674    while pos < lines.len() {
2675        let line = window.line(pos);
2676
2677        // Check for column separator (defines columns) - only if we started with full-width
2678        if is_full_width_start && is_column_separator(line) && !found_column_sep {
2679            found_column_sep = true;
2680            column_sep_pos = pos;
2681            has_header = pos > start_pos + 1; // Has header if there's content before column sep
2682            pos += 1;
2683            continue;
2684        }
2685
2686        // Check for blank line (row separator in body)
2687        if line.trim().is_empty() {
2688            found_blank_line = true;
2689            pos += 1;
2690            // Check if next line is a valid closing separator for this table shape.
2691            if pos < lines.len() {
2692                let next = window.line(pos);
2693                let is_valid_closer = if is_full_width_start {
2694                    try_parse_multiline_separator(next).is_some()
2695                } else {
2696                    is_column_separator(next)
2697                };
2698                if is_valid_closer {
2699                    found_closing_sep = true;
2700                    pos += 1; // Include the closing separator
2701                    break;
2702                }
2703            }
2704            continue;
2705        }
2706
2707        // Check for closing full-width dashes (only for full-width-start tables).
2708        if is_full_width_start && try_parse_multiline_separator(line).is_some() {
2709            found_closing_sep = true;
2710            pos += 1;
2711            break;
2712        }
2713
2714        // Check for closing column separator (for headerless tables)
2715        if is_column_sep_start && is_column_separator(line) && content_line_count > 0 {
2716            found_closing_sep = true;
2717            pos += 1;
2718            break;
2719        }
2720
2721        // Content row
2722        content_line_count += 1;
2723        pos += 1;
2724    }
2725
2726    // Must have found a column separator to be a valid multiline table
2727    if !found_column_sep {
2728        return None;
2729    }
2730
2731    // Must have had at least one blank line between rows (distinguishes from simple tables)
2732    if !found_blank_line {
2733        if !is_column_sep_start {
2734            return None;
2735        }
2736        let columns = headerless_columns.as_deref()?;
2737        if !is_headerless_single_row_without_blank(window, start_pos + 1, pos - 1, columns) {
2738            return None;
2739        }
2740    }
2741
2742    // Must have a closing separator
2743    if !found_closing_sep {
2744        return None;
2745    }
2746
2747    // Must have consumed more than just the opening separator
2748    if pos <= start_pos + 2 {
2749        return None;
2750    }
2751
2752    let end_pos = pos;
2753
2754    // Extract column boundaries from the separator line
2755    let columns = try_parse_table_separator(window.line(column_sep_pos))
2756        .expect("Column separator must be valid");
2757
2758    // Check for caption before table
2759    let caption_before = find_caption_before_table(window, start_pos);
2760
2761    // Check for caption after table
2762    let caption_after = if caption_before.is_some() {
2763        None
2764    } else {
2765        find_caption_after_table(window, end_pos)
2766    };
2767
2768    // Build the multiline table
2769    builder.start_node(SyntaxKind::MULTILINE_TABLE.into());
2770
2771    // Emit caption before if present
2772    if let Some((cap_start, cap_end)) = caption_before {
2773        emit_table_caption(builder, window, cap_start, cap_end, config);
2774        // Emit blank line between caption and table if present
2775        emit_caption_blank_lines(builder, window, cap_end, start_pos);
2776    }
2777
2778    // Emit opening separator. The dispatch line's prefix was already consumed
2779    // by core (`dispatch_tail`); a non-dispatch start (caption-before case)
2780    // re-emits its `  > ` prefix via `emit_prefix_at`.
2781    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2782    let tail = window.emit_or_dispatch_tail(builder, start_pos);
2783    emit_line_tokens(builder, tail);
2784    builder.finish_node();
2785
2786    // Track state for emitting. Accumulate ABSOLUTE indices of the lines making
2787    // up a multi-line row so each line's container prefix can be re-emitted via
2788    // the window.
2789    let mut in_header = has_header;
2790    let mut current_row_indices: Vec<usize> = Vec::new();
2791
2792    for i in (start_pos + 1)..end_pos {
2793        let line = window.line(i);
2794        // Column separator (header/body divider)
2795        if i == column_sep_pos {
2796            // Emit any accumulated header lines
2797            if !current_row_indices.is_empty() {
2798                emit_multiline_table_row(
2799                    builder,
2800                    window,
2801                    &current_row_indices,
2802                    &columns,
2803                    SyntaxKind::TABLE_HEADER,
2804                    config,
2805                );
2806                current_row_indices.clear();
2807            }
2808
2809            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2810            let tail = window.emit_or_dispatch_tail(builder, i);
2811            emit_line_tokens(builder, tail);
2812            builder.finish_node();
2813            in_header = false;
2814            continue;
2815        }
2816
2817        // Closing separator (full-width or column separator at end)
2818        if try_parse_multiline_separator(line).is_some() || is_column_separator(line) {
2819            // Emit any accumulated row lines
2820            if !current_row_indices.is_empty() {
2821                let kind = if in_header {
2822                    SyntaxKind::TABLE_HEADER
2823                } else {
2824                    SyntaxKind::TABLE_ROW
2825                };
2826                emit_multiline_table_row(
2827                    builder,
2828                    window,
2829                    &current_row_indices,
2830                    &columns,
2831                    kind,
2832                    config,
2833                );
2834                current_row_indices.clear();
2835            }
2836
2837            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2838            let tail = window.emit_or_dispatch_tail(builder, i);
2839            emit_line_tokens(builder, tail);
2840            builder.finish_node();
2841            continue;
2842        }
2843
2844        // Blank line (row separator)
2845        if line.trim().is_empty() {
2846            // Emit accumulated row
2847            if !current_row_indices.is_empty() {
2848                let kind = if in_header {
2849                    SyntaxKind::TABLE_HEADER
2850                } else {
2851                    SyntaxKind::TABLE_ROW
2852                };
2853                emit_multiline_table_row(
2854                    builder,
2855                    window,
2856                    &current_row_indices,
2857                    &columns,
2858                    kind,
2859                    config,
2860                );
2861                current_row_indices.clear();
2862            }
2863
2864            // Re-emit the interior `>`-only separator row's container prefix
2865            // (`  > `) inside the BLANK_LINE node so it round-trips losslessly.
2866            builder.start_node(SyntaxKind::BLANK_LINE.into());
2867            let tail = window.emit_or_dispatch_tail(builder, i);
2868            builder.token(SyntaxKind::BLANK_LINE.into(), tail);
2869            builder.finish_node();
2870            continue;
2871        }
2872
2873        // Content line - accumulate for current row
2874        current_row_indices.push(i);
2875    }
2876
2877    // Emit any remaining accumulated lines
2878    if !current_row_indices.is_empty() {
2879        let kind = if in_header {
2880            SyntaxKind::TABLE_HEADER
2881        } else {
2882            SyntaxKind::TABLE_ROW
2883        };
2884        emit_multiline_table_row(
2885            builder,
2886            window,
2887            &current_row_indices,
2888            &columns,
2889            kind,
2890            config,
2891        );
2892    }
2893
2894    // Emit caption after if present
2895    if let Some((cap_start, cap_end)) = caption_after {
2896        emit_caption_blank_lines(builder, window, end_pos, cap_start);
2897        emit_table_caption(builder, window, cap_start, cap_end, config);
2898    }
2899
2900    builder.finish_node(); // MultilineTable
2901
2902    // Calculate lines consumed
2903    let table_start = caption_before.map(|(start, _)| start).unwrap_or(start_pos);
2904    let table_end = if let Some((_, cap_end)) = caption_after {
2905        cap_end
2906    } else {
2907        end_pos
2908    };
2909
2910    Some(table_end - table_start)
2911}
2912
2913/// Extract cell contents from first line only (for CST emission).
2914/// Multi-line content will be in continuation TEXT tokens.
2915fn extract_first_line_cell_contents(line: &str, columns: &[Column]) -> Vec<String> {
2916    let (line_content, _) = strip_newline(line);
2917    let mut cells = Vec::new();
2918
2919    for column in columns.iter() {
2920        let column_start = column_offset_to_byte_index(line_content, column.start);
2921        let column_end = column_offset_to_byte_index(line_content, column.end);
2922
2923        // Extract FULL text for this column (including whitespace)
2924        let cell_text = if column_start < column_end {
2925            &line_content[column_start..column_end]
2926        } else if column_start < line_content.len() {
2927            &line_content[column_start..]
2928        } else {
2929            ""
2930        };
2931
2932        cells.push(cell_text.to_string());
2933    }
2934
2935    cells
2936}
2937
2938/// Emit a multiline table row with inline parsing (Phase 7.1).
2939///
2940/// `indices` are ABSOLUTE line indices into the window's raw buffer; each
2941/// physical line re-emits its container prefix (`  > `) via the window before
2942/// its content. With an empty prefix the tails equal the raw lines, so emission
2943/// is byte-identical to the pre-window path.
2944fn emit_multiline_table_row(
2945    builder: &mut GreenNodeBuilder<'static>,
2946    window: &StrippedLines<'_, '_>,
2947    indices: &[usize],
2948    columns: &[Column],
2949    kind: SyntaxKind,
2950    config: &ParserOptions,
2951) {
2952    if indices.is_empty() {
2953        return;
2954    }
2955
2956    builder.start_node(kind.into());
2957
2958    // Emit the first line's container prefix as tokens, then slice cells from
2959    // the prefix-stripped tail (for CST losslessness, only the first physical
2960    // line is parsed into cells; continuation lines stay verbatim TEXT).
2961    let first_line = window.emit_or_dispatch_tail(builder, indices[0]);
2962    let cell_contents = extract_first_line_cell_contents(first_line, columns);
2963    let (trimmed, newline_str) = strip_newline(first_line);
2964    let mut current_pos = 0;
2965
2966    for (col_idx, column) in columns.iter().enumerate() {
2967        let cell_text = &cell_contents[col_idx];
2968        let cell_start = column_offset_to_byte_index(trimmed, column.start);
2969        let cell_end = column_offset_to_byte_index(trimmed, column.end);
2970
2971        // Emit whitespace before cell
2972        if current_pos < cell_start {
2973            builder.token(
2974                SyntaxKind::WHITESPACE.into(),
2975                &trimmed[current_pos..cell_start],
2976            );
2977        }
2978
2979        // Emit cell with inline parsing (first line content only)
2980        emit_table_cell(builder, cell_text, config);
2981
2982        current_pos = cell_end;
2983    }
2984
2985    // Emit trailing whitespace
2986    if current_pos < trimmed.len() {
2987        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
2988    }
2989
2990    // Emit newline
2991    if !newline_str.is_empty() {
2992        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2993    }
2994
2995    // Emit continuation lines as TEXT to preserve exact line structure,
2996    // re-emitting each line's container prefix first.
2997    for &idx in &indices[1..] {
2998        let tail = window.emit_or_dispatch_tail(builder, idx);
2999        emit_line_tokens(builder, tail);
3000    }
3001
3002    builder.finish_node();
3003}
3004
3005#[cfg(test)]
3006mod multiline_table_tests {
3007    use super::super::container_prefix::ContainerPrefix;
3008    use super::*;
3009    use crate::syntax::SyntaxNode;
3010
3011    #[test]
3012    fn test_multiline_separator_detection() {
3013        assert!(
3014            try_parse_multiline_separator(
3015                "-------------------------------------------------------------"
3016            )
3017            .is_some()
3018        );
3019        assert!(try_parse_multiline_separator("---").is_some());
3020        assert!(try_parse_multiline_separator("  -----").is_some()); // with leading spaces
3021        assert!(try_parse_multiline_separator("--").is_none()); // too short
3022        assert!(try_parse_multiline_separator("--- ---").is_none()); // has spaces
3023        assert!(try_parse_multiline_separator("+---+").is_none()); // grid separator
3024    }
3025
3026    #[test]
3027    fn test_basic_multiline_table() {
3028        let input = vec![
3029            "-------------------------------------------------------------",
3030            " Centered   Default           Right Left",
3031            "  Header    Aligned         Aligned Aligned",
3032            "----------- ------- --------------- -------------------------",
3033            "   First    row                12.0 Example of a row that",
3034            "                                    spans multiple lines.",
3035            "",
3036            "  Second    row                 5.0 Here's another one.",
3037            "-------------------------------------------------------------",
3038            "",
3039        ];
3040
3041        let mut builder = GreenNodeBuilder::new();
3042        let prefix = ContainerPrefix::default();
3043        let window = StrippedLines::new(&input, 0, &prefix);
3044        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3045
3046        assert!(result.is_some());
3047        assert_eq!(result.unwrap(), 9);
3048    }
3049
3050    #[test]
3051    fn test_multiline_table_headerless() {
3052        let input = vec![
3053            "----------- ------- --------------- -------------------------",
3054            "   First    row                12.0 Example of a row that",
3055            "                                    spans multiple lines.",
3056            "",
3057            "  Second    row                 5.0 Here's another one.",
3058            "----------- ------- --------------- -------------------------",
3059            "",
3060        ];
3061
3062        let mut builder = GreenNodeBuilder::new();
3063        let prefix = ContainerPrefix::default();
3064        let window = StrippedLines::new(&input, 0, &prefix);
3065        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3066
3067        assert!(result.is_some());
3068        assert_eq!(result.unwrap(), 6);
3069    }
3070
3071    #[test]
3072    fn test_multiline_table_headerless_single_line_is_not_multiline() {
3073        let input = vec![
3074            "-------     ------ ----------   -------",
3075            "     12     12        12             12",
3076            "-------     ------ ----------   -------",
3077            "",
3078            "Not part of table.",
3079            "",
3080        ];
3081
3082        let mut builder = GreenNodeBuilder::new();
3083        let prefix = ContainerPrefix::default();
3084        let window = StrippedLines::new(&input, 0, &prefix);
3085        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3086
3087        assert!(result.is_none());
3088    }
3089
3090    #[test]
3091    fn test_multiline_table_headerless_single_row_continuation_without_blank_line() {
3092        let input = vec![
3093            "----------  ---------  -----------  ---------------------------",
3094            "   First    row               12.0  Example of a row that spans",
3095            "                                    multiple lines.",
3096            "----------  ---------  -----------  ---------------------------",
3097            "",
3098        ];
3099
3100        let mut builder = GreenNodeBuilder::new();
3101        let prefix = ContainerPrefix::default();
3102        let window = StrippedLines::new(&input, 0, &prefix);
3103        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3104
3105        assert!(result.is_some());
3106        assert_eq!(result.unwrap(), 4);
3107    }
3108
3109    #[test]
3110    fn test_multiline_table_with_caption() {
3111        let input = vec![
3112            "-------------------------------------------------------------",
3113            " Col1       Col2",
3114            "----------- -------",
3115            "   A        B",
3116            "",
3117            "-------------------------------------------------------------",
3118            "",
3119            "Table: Here's the caption.",
3120            "",
3121        ];
3122
3123        let mut builder = GreenNodeBuilder::new();
3124        let prefix = ContainerPrefix::default();
3125        let window = StrippedLines::new(&input, 0, &prefix);
3126        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3127
3128        assert!(result.is_some());
3129        // table (6 lines) + blank + caption
3130        assert_eq!(result.unwrap(), 8);
3131    }
3132
3133    #[test]
3134    fn test_multiline_table_single_row() {
3135        let input = vec![
3136            "---------------------------------------------",
3137            " Header1    Header2",
3138            "----------- -----------",
3139            "   Data     More data",
3140            "",
3141            "---------------------------------------------",
3142            "",
3143        ];
3144
3145        let mut builder = GreenNodeBuilder::new();
3146        let prefix = ContainerPrefix::default();
3147        let window = StrippedLines::new(&input, 0, &prefix);
3148        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3149
3150        assert!(result.is_some());
3151        assert_eq!(result.unwrap(), 6);
3152    }
3153
3154    #[test]
3155    fn test_headerless_multiline_table_does_not_close_on_full_width_rule() {
3156        let input = vec![
3157            "- - - - -",
3158            "Third section with underscores.",
3159            "",
3160            "_____",
3161            "",
3162            "> Quote before rule",
3163            ">",
3164            "> ***",
3165            ">",
3166            "> Quote after rule",
3167            "",
3168            "Final paragraph.",
3169            "",
3170            "Here's a horizontal rule:",
3171            "",
3172            "---",
3173            "Text directly after the horizontal rule.",
3174            "",
3175        ];
3176
3177        let mut builder = GreenNodeBuilder::new();
3178        let prefix = ContainerPrefix::default();
3179        let window = StrippedLines::new(&input, 0, &prefix);
3180        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3181
3182        assert!(result.is_none());
3183    }
3184
3185    #[test]
3186    fn test_not_multiline_table() {
3187        // Simple table should not be parsed as multiline
3188        let input = vec![
3189            "  Right     Left     Center     Default",
3190            "-------     ------ ----------   -------",
3191            "     12     12        12            12",
3192            "",
3193        ];
3194
3195        let mut builder = GreenNodeBuilder::new();
3196        let prefix = ContainerPrefix::default();
3197        let window = StrippedLines::new(&input, 0, &prefix);
3198        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3199
3200        // Should not parse because first line isn't a full-width separator
3201        assert!(result.is_none());
3202    }
3203
3204    // Phase 7.1: Unit tests for emit_table_cell() helper
3205    #[test]
3206    fn test_emit_table_cell_plain_text() {
3207        let mut builder = GreenNodeBuilder::new();
3208        emit_table_cell(&mut builder, "Cell", &ParserOptions::default());
3209        let green = builder.finish();
3210        let node = SyntaxNode::new_root(green);
3211
3212        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3213        assert_eq!(node.text(), "Cell");
3214
3215        // Should have TEXT child
3216        let children: Vec<_> = node.children_with_tokens().collect();
3217        assert_eq!(children.len(), 1);
3218        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
3219    }
3220
3221    #[test]
3222    fn test_emit_table_cell_with_emphasis() {
3223        let mut builder = GreenNodeBuilder::new();
3224        emit_table_cell(&mut builder, "*italic*", &ParserOptions::default());
3225        let green = builder.finish();
3226        let node = SyntaxNode::new_root(green);
3227
3228        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3229        assert_eq!(node.text(), "*italic*");
3230
3231        // Should have EMPHASIS child
3232        let children: Vec<_> = node.children().collect();
3233        assert_eq!(children.len(), 1);
3234        assert_eq!(children[0].kind(), SyntaxKind::EMPHASIS);
3235    }
3236
3237    #[test]
3238    fn test_emit_table_cell_with_code() {
3239        let mut builder = GreenNodeBuilder::new();
3240        emit_table_cell(&mut builder, "`code`", &ParserOptions::default());
3241        let green = builder.finish();
3242        let node = SyntaxNode::new_root(green);
3243
3244        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3245        assert_eq!(node.text(), "`code`");
3246
3247        // Should have CODE_SPAN child
3248        let children: Vec<_> = node.children().collect();
3249        assert_eq!(children.len(), 1);
3250        assert_eq!(children[0].kind(), SyntaxKind::INLINE_CODE);
3251    }
3252
3253    #[test]
3254    fn test_emit_table_cell_with_link() {
3255        let mut builder = GreenNodeBuilder::new();
3256        emit_table_cell(&mut builder, "[text](url)", &ParserOptions::default());
3257        let green = builder.finish();
3258        let node = SyntaxNode::new_root(green);
3259
3260        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3261        assert_eq!(node.text(), "[text](url)");
3262
3263        // Should have LINK child
3264        let children: Vec<_> = node.children().collect();
3265        assert_eq!(children.len(), 1);
3266        assert_eq!(children[0].kind(), SyntaxKind::LINK);
3267    }
3268
3269    #[test]
3270    fn test_emit_table_cell_with_strong() {
3271        let mut builder = GreenNodeBuilder::new();
3272        emit_table_cell(&mut builder, "**bold**", &ParserOptions::default());
3273        let green = builder.finish();
3274        let node = SyntaxNode::new_root(green);
3275
3276        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3277        assert_eq!(node.text(), "**bold**");
3278
3279        // Should have STRONG child
3280        let children: Vec<_> = node.children().collect();
3281        assert_eq!(children.len(), 1);
3282        assert_eq!(children[0].kind(), SyntaxKind::STRONG);
3283    }
3284
3285    #[test]
3286    fn test_emit_table_cell_mixed_inline() {
3287        let mut builder = GreenNodeBuilder::new();
3288        emit_table_cell(
3289            &mut builder,
3290            "Text **bold** and `code`",
3291            &ParserOptions::default(),
3292        );
3293        let green = builder.finish();
3294        let node = SyntaxNode::new_root(green);
3295
3296        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3297        assert_eq!(node.text(), "Text **bold** and `code`");
3298
3299        // Should have multiple children: TEXT, STRONG, TEXT, CODE_SPAN
3300        let children: Vec<_> = node.children_with_tokens().collect();
3301        assert!(children.len() >= 4);
3302
3303        // Check some expected types
3304        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
3305        assert_eq!(children[1].kind(), SyntaxKind::STRONG);
3306    }
3307
3308    #[test]
3309    fn test_emit_table_cell_empty() {
3310        let mut builder = GreenNodeBuilder::new();
3311        emit_table_cell(&mut builder, "", &ParserOptions::default());
3312        let green = builder.finish();
3313        let node = SyntaxNode::new_root(green);
3314
3315        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3316        assert_eq!(node.text(), "");
3317
3318        // Empty cell should have no children
3319        let children: Vec<_> = node.children_with_tokens().collect();
3320        assert_eq!(children.len(), 0);
3321    }
3322
3323    #[test]
3324    fn test_emit_table_cell_escaped_pipe() {
3325        let mut builder = GreenNodeBuilder::new();
3326        emit_table_cell(&mut builder, r"A \| B", &ParserOptions::default());
3327        let green = builder.finish();
3328        let node = SyntaxNode::new_root(green);
3329
3330        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3331        // The escaped pipe should be preserved
3332        assert_eq!(node.text(), r"A \| B");
3333    }
3334}