Skip to main content

panache_parser/parser/blocks/
tables.rs

1//! Simple table parsing for Pandoc's simple_tables extension.
2
3use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6use unicode_width::UnicodeWidthChar;
7
8use crate::parser::utils::attributes::{
9    emit_attribute_node, try_parse_trailing_attributes_with_pos,
10};
11use crate::parser::utils::helpers::{emit_line_tokens, strip_newline};
12use crate::parser::utils::inline_emission;
13
14use super::container_prefix::StrippedLines;
15
16/// Read-only indexed view over lines for table detection scans. Two
17/// backings:
18///
19/// - `[&str]` — a raw, unstripped line buffer, used by callers that scan
20///   the source directly (the block dispatcher's caption lookahead, list
21///   and definition-list probes).
22/// - [`StrippedLines`] / [`UniformStripView`] — a container-prefix-stripped
23///   view that strips each line lazily on access via
24///   [`StrippedLines::strip_at`]. Detection scans touch only a bounded
25///   range (they stop at the first blank line), so this stays
26///   O(scanned lines) rather than materializing the whole buffer. The old
27///   `strip_all` collected `0..raw.len()` on every call, which was
28///   quadratic when table detection runs at every block start inside a
29///   large blockquote or list.
30pub(crate) trait LineView {
31    /// The line at absolute index `i`.
32    fn line(&self, i: usize) -> &str;
33    /// Total number of lines (absolute upper bound for indices).
34    fn line_count(&self) -> usize;
35}
36
37impl LineView for [&str] {
38    fn line(&self, i: usize) -> &str {
39        self[i]
40    }
41    fn line_count(&self) -> usize {
42        self.len()
43    }
44}
45
46impl<'a, 'p> LineView for StrippedLines<'a, 'p> {
47    fn line(&self, i: usize) -> &str {
48        self.strip_at(i)
49    }
50    fn line_count(&self) -> usize {
51        self.raw().len()
52    }
53}
54
55/// A [`LineView`] over a [`StrippedLines`] window that strips *every* line —
56/// including the dispatch line — with the full container strip rather than
57/// the emission-safe line-0 strip. Grid-border detection needs this: a
58/// `+---+` border sitting at column 0 of a list item's inner content must
59/// not retain the list indent, or the strict column-0 check in
60/// `try_parse_grid_separator` would reject it. Emission still goes through
61/// the window, which preserves the indent bytes. This reproduces the old
62/// grid path's `stripped[dispatch] = prefix.strip(...)` override, but
63/// lazily.
64pub(crate) struct UniformStripView<'s, 'a, 'p>(&'s StrippedLines<'a, 'p>);
65
66impl<'s, 'a, 'p> LineView for UniformStripView<'s, 'a, 'p> {
67    fn line(&self, i: usize) -> &str {
68        self.0.prefix().strip(self.0.raw()[i])
69    }
70    fn line_count(&self) -> usize {
71        self.0.raw().len()
72    }
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76pub enum Alignment {
77    Left,
78    Right,
79    Center,
80    Default,
81}
82
83/// Column information extracted from the separator line.
84#[derive(Debug, Clone)]
85pub(crate) struct Column {
86    /// Start position (byte index) in the line
87    start: usize,
88    /// End position (byte index) in the line
89    end: usize,
90    /// Column alignment
91    alignment: Alignment,
92}
93
94/// Try to detect if a line is a table separator line.
95/// Returns Some(column positions) if it's a valid separator.
96pub(crate) fn try_parse_table_separator(line: &str) -> Option<Vec<Column>> {
97    let trimmed = line.trim_start();
98    // Strip trailing newline if present (CRLF or LF)
99    let (trimmed, newline_str) = strip_newline(trimmed);
100    let leading_spaces = line.len() - trimmed.len() - newline_str.len();
101
102    // Must have leading spaces <= 3 to not be a code block
103    if leading_spaces > 3 {
104        return None;
105    }
106
107    // Simple tables only use dashed separators.
108    if trimmed.contains('*') || trimmed.contains('_') {
109        return None;
110    }
111
112    // Must contain at least one dash
113    if !trimmed.contains('-') {
114        return None;
115    }
116
117    // A separator line consists of dashes and spaces
118    if !trimmed.chars().all(|c| c == '-' || c == ' ') {
119        return None;
120    }
121
122    // Must not be a horizontal rule.
123    let dash_groups: Vec<_> = trimmed.split(' ').filter(|s| !s.is_empty()).collect();
124    if dash_groups.len() <= 1 {
125        return None;
126    }
127
128    // Extract column positions from dash groups
129    let columns = extract_columns(trimmed, leading_spaces);
130
131    if columns.is_empty() {
132        return None;
133    }
134
135    Some(columns)
136}
137
138/// Extract column positions from a separator line.
139fn extract_columns(separator: &str, offset: usize) -> Vec<Column> {
140    let mut columns = Vec::new();
141    let mut in_dashes = false;
142    let mut col_start = 0;
143
144    for (i, ch) in separator.char_indices() {
145        match ch {
146            '-' if !in_dashes => {
147                col_start = i + offset;
148                in_dashes = true;
149            }
150            ' ' if in_dashes => {
151                columns.push(Column {
152                    start: col_start,
153                    end: i + offset,
154                    alignment: Alignment::Default, // Will be determined later
155                });
156                in_dashes = false;
157            }
158            _ => {}
159        }
160    }
161
162    // Handle last column
163    if in_dashes {
164        columns.push(Column {
165            start: col_start,
166            end: separator.len() + offset,
167            alignment: Alignment::Default,
168        });
169    }
170
171    columns
172}
173
174/// Convert a character column offset into a UTF-8 byte index for `line`.
175///
176/// Simple-table column boundaries come from ASCII separator lines where
177/// character and byte offsets are identical. Data rows may contain multibyte
178/// characters, so we must remap offsets before slicing.
179fn column_offset_to_byte_index(line: &str, offset: usize) -> usize {
180    line.char_indices()
181        .nth(offset)
182        .map_or(line.len(), |(byte_idx, _)| byte_idx)
183}
184
185/// Try to parse a table caption from a line.
186/// Returns Some((prefix_len, caption_text)) if it's a caption.
187fn try_parse_caption_prefix(line: &str) -> Option<(usize, &str)> {
188    let trimmed = line.trim_start();
189    let leading_spaces = line.len() - trimmed.len();
190
191    // Must have leading spaces <= 3 to not be a code block
192    if leading_spaces > 3 {
193        return None;
194    }
195
196    // Check for "Table:" or "table:" or just ":".
197    if let Some(rest) = trimmed.strip_prefix("Table:") {
198        Some((leading_spaces + 6, rest))
199    } else if let Some(rest) = trimmed.strip_prefix("table:") {
200        Some((leading_spaces + 6, rest))
201    } else if let Some(rest) = trimmed.strip_prefix(':') {
202        // Just ":" caption markers must be followed by whitespace (Pandoc-style).
203        // This avoids accidentally treating constructs like fenced div fences ":::" as captions.
204        if rest.starts_with(|c: char| c.is_whitespace()) {
205            Some((leading_spaces + 1, rest))
206        } else {
207            None
208        }
209    } else {
210        None
211    }
212}
213
214/// Check if a line could be the start of a table caption.
215fn is_table_caption_start(line: &str) -> bool {
216    try_parse_caption_prefix(line).is_some()
217}
218
219fn is_bare_colon_caption_start(line: &str) -> bool {
220    let trimmed = line.trim_start();
221    trimmed.starts_with(':') && !trimmed.starts_with("::") && !trimmed.starts_with(":::")
222}
223
224fn bare_colon_caption_looks_like_definition_code_block(line: &str) -> bool {
225    let Some((_, rest)) = try_parse_caption_prefix(line) else {
226        return false;
227    };
228    let trimmed = rest.trim_start();
229    trimmed.starts_with("```") || trimmed.starts_with("~~~")
230}
231
232fn line_is_fenced_div_fence(line: &str) -> bool {
233    let trimmed = line.trim_start();
234    let colon_count = trimmed.chars().take_while(|&c| c == ':').count();
235    if colon_count < 3 {
236        return false;
237    }
238    let rest = &trimmed[colon_count..];
239    rest.is_empty() || rest.starts_with(char::is_whitespace)
240}
241
242fn is_valid_caption_start_before_table(lines: &(impl LineView + ?Sized), pos: usize) -> bool {
243    if !is_table_caption_start(lines.line(pos)) {
244        return false;
245    }
246
247    if is_bare_colon_caption_start(lines.line(pos))
248        && bare_colon_caption_looks_like_definition_code_block(lines.line(pos))
249    {
250        return false;
251    }
252
253    // Avoid stealing definition-list definitions (":   ...") as table captions.
254    if is_bare_colon_caption_start(lines.line(pos))
255        && pos > 0
256        && !lines.line(pos - 1).trim().is_empty()
257        && !line_is_fenced_div_fence(lines.line(pos - 1))
258    {
259        return false;
260    }
261    true
262}
263
264/// Check if a line could be the start of a grid table.
265/// Grid tables start with a separator line like +---+---+ or +===+===+
266fn is_grid_table_start(line: &str) -> bool {
267    try_parse_grid_separator(line).is_some()
268}
269
270/// Check if a line could be the start of a multiline table.
271/// Multiline tables start with either:
272/// - A full-width dash separator (----)
273/// - A column separator with dashes and spaces (---- ---- ----)
274fn is_multiline_table_start(line: &str) -> bool {
275    try_parse_multiline_separator(line).is_some() || is_column_separator(line)
276}
277
278/// Check if there's a table following a potential caption at this position.
279/// This is used to avoid parsing a caption as a paragraph when it belongs to a table.
280pub(crate) fn is_caption_followed_by_table(
281    lines: &(impl LineView + ?Sized),
282    caption_pos: usize,
283) -> bool {
284    if caption_pos >= lines.line_count() {
285        return false;
286    }
287
288    // Caption must start with a caption prefix
289    if !is_valid_caption_start_before_table(lines, caption_pos) {
290        return false;
291    }
292
293    let mut pos = caption_pos + 1;
294
295    // Skip continuation lines of caption (non-blank lines).
296    // Stop at fenced-div fences (`:::`) — those close the enclosing div and
297    // must not be folded into the caption.
298    while pos < lines.line_count()
299        && !lines.line(pos).trim().is_empty()
300        && !line_is_fenced_div_fence(lines.line(pos))
301    {
302        // If we hit a table separator, we found a table
303        if try_parse_table_separator(lines.line(pos)).is_some() {
304            return true;
305        }
306        pos += 1;
307    }
308
309    // Skip one blank line
310    if pos < lines.line_count() && lines.line(pos).trim().is_empty() {
311        pos += 1;
312    }
313
314    // Check for a table grid at the next position.
315    table_grid_starts_at(lines, pos)
316}
317
318/// Cheap lookahead: does any table kind's grid begin at absolute line `pos`?
319///
320/// This is the lightweight twin of the block dispatcher's `first_kind_at`,
321/// which answers the same "is there a table here?" question by attempting a
322/// full parse of each kind in turn. We deliberately do **not** call that from
323/// the caption lookahead: caption detection runs at every block start, and a
324/// full per-kind parse there would reintroduce the O(n²) blowup the bounded
325/// separator probe exists to avoid. To keep the two predicates in agreement,
326/// this calls the same primitive separator detectors the real parsers gate on
327/// (`is_grid_table_start` → `try_parse_grid_separator`, `is_multiline_table_start`
328/// → `try_parse_multiline_separator`/`is_column_separator`,
329/// `try_parse_table_separator`, `try_parse_pipe_separator`).
330fn table_grid_starts_at(lines: &(impl LineView + ?Sized), pos: usize) -> bool {
331    if pos >= lines.line_count() {
332        return false;
333    }
334    let line = lines.line(pos);
335
336    // Grid table start (`+---+---+` or `+===+===+`).
337    if is_grid_table_start(line) {
338        return true;
339    }
340
341    // Multiline table start (`----` or `---- ---- ----`).
342    if is_multiline_table_start(line) {
343        return true;
344    }
345
346    // Separator line (simple/pipe table, headerless).
347    if try_parse_table_separator(line).is_some() {
348        return true;
349    }
350
351    // Header line followed by a separator (simple/pipe table with header).
352    if pos + 1 < lines.line_count() && !line.trim().is_empty() {
353        let next_line = lines.line(pos + 1);
354        if try_parse_table_separator(next_line).is_some()
355            || try_parse_pipe_separator(next_line).is_some()
356        {
357            return true;
358        }
359    }
360
361    false
362}
363
364fn caption_range_starting_at(
365    lines: &(impl LineView + ?Sized),
366    start: usize,
367) -> Option<(usize, usize)> {
368    if start >= lines.line_count() || !is_table_caption_start(lines.line(start)) {
369        return None;
370    }
371    let mut end = start + 1;
372    while end < lines.line_count()
373        && !lines.line(end).trim().is_empty()
374        && !line_is_fenced_div_fence(lines.line(end))
375    {
376        end += 1;
377    }
378    Some((start, end))
379}
380
381/// Find caption before table (if any).
382/// Returns (caption_start, caption_end) positions, or None.
383fn find_caption_before_table(
384    lines: &(impl LineView + ?Sized),
385    table_start: usize,
386) -> Option<(usize, usize)> {
387    if table_start == 0 {
388        return None;
389    }
390
391    // Look backward for a caption
392    // Caption must be immediately before table (with possible blank line between)
393    let mut pos = table_start - 1;
394
395    // Skip one blank line if present
396    if lines.line(pos).trim().is_empty() {
397        if pos == 0 {
398            return None;
399        }
400        pos -= 1;
401    }
402
403    // Now pos points to the last non-blank line before the table
404    // This could be the last line of a multiline caption, or a single-line caption
405    let caption_end = pos + 1; // End is exclusive
406
407    // If this line is NOT a caption start, it might be a continuation line
408    // Scan backward through non-blank lines to find the caption start
409    if !is_valid_caption_start_before_table(lines, pos) {
410        // Not a caption start - check if there's a caption start above
411        let mut scan_pos = pos;
412        while scan_pos > 0 {
413            scan_pos -= 1;
414            let line = lines.line(scan_pos);
415
416            // If we hit a blank line or fenced-div fence, we've gone too far
417            if line.trim().is_empty() || line_is_fenced_div_fence(line) {
418                return None;
419            }
420
421            // If we find a caption start, this is the beginning of the multiline caption
422            if is_valid_caption_start_before_table(lines, scan_pos) {
423                if scan_pos > 0 && !lines.line(scan_pos - 1).trim().is_empty() {
424                    return None;
425                }
426                if previous_nonblank_looks_like_table(lines, scan_pos) {
427                    return None;
428                }
429                return Some((scan_pos, caption_end));
430            }
431        }
432        // Scanned to beginning without finding caption start
433        None
434    } else {
435        if pos > 0 && !lines.line(pos - 1).trim().is_empty() {
436            return None;
437        }
438        if previous_nonblank_looks_like_table(lines, pos) {
439            return None;
440        }
441        // This line is a caption start - return the range
442        Some((pos, caption_end))
443    }
444}
445
446fn previous_nonblank_looks_like_table(lines: &(impl LineView + ?Sized), pos: usize) -> bool {
447    if pos == 0 {
448        return false;
449    }
450    let mut i = pos;
451    while i > 0 {
452        i -= 1;
453        let line = lines.line(i).trim();
454        if line.is_empty() {
455            continue;
456        }
457        return line_looks_like_table_syntax(line);
458    }
459    false
460}
461
462fn line_looks_like_table_syntax(line: &str) -> bool {
463    if line.starts_with('|') && line.matches('|').count() >= 2 {
464        return true;
465    }
466    if line.starts_with('+') && line.ends_with('+') && (line.contains('-') || line.contains('=')) {
467        return true;
468    }
469    try_parse_table_separator(line).is_some()
470        || try_parse_pipe_separator(line).is_some()
471        || try_parse_grid_separator(line).is_some()
472}
473
474/// Find caption after table (if any).
475/// Returns (caption_start, caption_end) positions, or None.
476fn find_caption_after_table(
477    lines: &(impl LineView + ?Sized),
478    table_end: usize,
479) -> Option<(usize, usize)> {
480    if table_end >= lines.line_count() {
481        return None;
482    }
483
484    let mut pos = table_end;
485
486    // Skip one blank line if present
487    if pos < lines.line_count() && lines.line(pos).trim().is_empty() {
488        pos += 1;
489    }
490
491    if pos >= lines.line_count() {
492        return None;
493    }
494
495    // Check if this line is a caption
496    if is_table_caption_start(lines.line(pos)) {
497        let caption_start = pos;
498        // Find end of caption (continues until blank line or fenced-div fence)
499        let mut caption_end = caption_start + 1;
500        while caption_end < lines.line_count()
501            && !lines.line(caption_end).trim().is_empty()
502            && !line_is_fenced_div_fence(lines.line(caption_end))
503        {
504            caption_end += 1;
505        }
506        Some((caption_start, caption_end))
507    } else {
508        None
509    }
510}
511
512/// Emit a table caption node.
513/// Emit caption text for a single line. If `lift_trailing_attrs` is set and
514/// the text ends with a balanced `{...}` block, lift it into a structural
515/// `ATTRIBUTE` node so `AttributeNode::cast` finds its id (matches Pandoc's
516/// `+caption_attributes` behavior — `: caption {#tbl-id}` gives the table
517/// the id).
518fn emit_caption_line_text(
519    builder: &mut GreenNodeBuilder<'static>,
520    text_with_newline: &str,
521    config: &ParserOptions,
522    lift_trailing_attrs: bool,
523) {
524    let (text, newline_str) = strip_newline(text_with_newline);
525
526    if lift_trailing_attrs
527        && !text.is_empty()
528        && let Some((_attrs, before_attrs, start_brace_pos)) =
529            try_parse_trailing_attributes_with_pos(text)
530    {
531        let trimmed_len = text.trim_end().len();
532        let space = &text[before_attrs.len()..start_brace_pos];
533        let raw_attrs = &text[start_brace_pos..trimmed_len];
534        let trailing_ws = &text[trimmed_len..];
535
536        if !before_attrs.is_empty() {
537            inline_emission::emit_inlines(builder, before_attrs, config, false);
538        }
539        if !space.is_empty() {
540            builder.token(SyntaxKind::WHITESPACE.into(), space);
541        }
542        emit_attribute_node(builder, raw_attrs);
543        if !trailing_ws.is_empty() {
544            builder.token(SyntaxKind::WHITESPACE.into(), trailing_ws);
545        }
546        if !newline_str.is_empty() {
547            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
548        }
549        return;
550    }
551
552    if !text.is_empty() {
553        inline_emission::emit_inlines(builder, text, config, false);
554    }
555    if !newline_str.is_empty() {
556        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
557    }
558}
559
560/// Emit the blank (container-only) lines in the absolute range `[from, to)` as
561/// `BLANK_LINE` nodes. Re-emits each line's container prefix as tokens via the
562/// window, so a `>`-only blank line between a caption and its table inside a
563/// blockquote round-trips losslessly. Mirrors the interior blank-row emitter in
564/// `try_parse_multiline_table`. An empty range emits nothing.
565fn emit_caption_blank_lines(
566    builder: &mut GreenNodeBuilder<'static>,
567    window: &StrippedLines<'_, '_>,
568    from: usize,
569    to: usize,
570) {
571    for abs in from..to {
572        // `window.line` is the container-stripped view, so a `>`-only line reads
573        // as blank.
574        if window.line(abs).trim().is_empty() {
575            builder.start_node(SyntaxKind::BLANK_LINE.into());
576            let tail = window.emit_or_dispatch_tail(builder, abs);
577            builder.token(SyntaxKind::BLANK_LINE.into(), tail);
578            builder.finish_node();
579        }
580    }
581}
582
583fn emit_table_caption(
584    builder: &mut GreenNodeBuilder<'static>,
585    window: &StrippedLines<'_, '_>,
586    start: usize,
587    end: usize,
588    config: &ParserOptions,
589) {
590    builder.start_node(SyntaxKind::TABLE_CAPTION.into());
591
592    let last_idx = (end - start).saturating_sub(1);
593
594    for (i, abs) in (start..end).enumerate() {
595        let lift_attrs = i == last_idx;
596
597        // Re-emit this caption line's container prefix (`>`/whitespace) as
598        // tokens — except the dispatch line, whose prefix the core already
599        // emitted — and operate on the stripped `tail`, so the caption prefix
600        // (`Table:`/`:`) is recognized inside a blockquote or list rather than
601        // swallowed into the caption text (which doubled the marker and broke
602        // losslessness).
603        let tail = window.emit_or_dispatch_tail(builder, abs);
604
605        if i == 0 {
606            // First line - parse and emit prefix separately
607            let trimmed = tail.trim_start();
608            let leading_ws_len = tail.len() - trimmed.len();
609
610            // Emit leading whitespace if present
611            if leading_ws_len > 0 {
612                builder.token(SyntaxKind::WHITESPACE.into(), &tail[..leading_ws_len]);
613            }
614
615            // Check for caption prefix and emit separately
616            // Calculate where the prefix ends (after trimmed content)
617            let prefix_and_rest = if tail.ends_with('\n') {
618                &tail[leading_ws_len..tail.len() - 1] // Exclude newline
619            } else {
620                &tail[leading_ws_len..]
621            };
622
623            let (prefix_len, prefix_text) = if prefix_and_rest.starts_with("Table: ") {
624                (7, "Table: ")
625            } else if prefix_and_rest.starts_with("table: ") {
626                (7, "table: ")
627            } else if prefix_and_rest.starts_with(": ") {
628                (2, ": ")
629            } else if prefix_and_rest.starts_with(':') {
630                (1, ":")
631            } else {
632                (0, "")
633            };
634
635            if prefix_len > 0 {
636                builder.token(SyntaxKind::TABLE_CAPTION_PREFIX.into(), prefix_text);
637
638                // Emit rest of line after prefix
639                let rest_start = leading_ws_len + prefix_len;
640                if rest_start < tail.len() {
641                    emit_caption_line_text(builder, &tail[rest_start..], config, lift_attrs);
642                }
643            } else {
644                // No recognized prefix, emit whole trimmed line
645                emit_caption_line_text(builder, &tail[leading_ws_len..], config, lift_attrs);
646            }
647        } else {
648            // Continuation lines - emit with inline parsing (attrs only on last line).
649            emit_caption_line_text(builder, tail, config, lift_attrs);
650        }
651    }
652
653    builder.finish_node(); // TABLE_CAPTION
654}
655
656/// Emit a table cell with inline content parsing.
657/// This is the core helper for Phase 7.1 table inline parsing migration.
658fn emit_table_cell(
659    builder: &mut GreenNodeBuilder<'static>,
660    cell_text: &str,
661    config: &ParserOptions,
662) {
663    builder.start_node(SyntaxKind::TABLE_CELL.into());
664
665    // Parse inline content within the cell
666    if !cell_text.is_empty() {
667        inline_emission::emit_inlines(builder, cell_text, config, false);
668    }
669
670    builder.finish_node(); // TABLE_CELL
671}
672
673/// Determine column alignments based on separator and optional header.
674fn determine_alignments(columns: &mut [Column], separator_line: &str, header_line: Option<&str>) {
675    for col in columns.iter_mut() {
676        let sep_slice = &separator_line[col.start..col.end];
677
678        if let Some(header) = header_line {
679            let header_start = column_offset_to_byte_index(header, col.start);
680            let header_end = column_offset_to_byte_index(header, col.end);
681
682            // Extract header text for this column
683            let header_text = if header_start < header_end {
684                header[header_start..header_end].trim()
685            } else if header_start < header.len() {
686                header[header_start..].trim()
687            } else {
688                ""
689            };
690
691            if header_text.is_empty() {
692                col.alignment = Alignment::Default;
693                continue;
694            }
695
696            // Find where the header text starts and ends within the column
697            let header_in_col = &header[header_start..header_end];
698            let text_start = header_in_col.len() - header_in_col.trim_start().len();
699            let text_end = header_in_col.trim_end().len() + text_start;
700
701            // Check dash alignment relative to text
702            let dashes_start = 0; // Dashes start at beginning of sep_slice
703            let dashes_end = sep_slice.len();
704
705            let flush_left = dashes_start == text_start;
706            let flush_right = dashes_end == text_end;
707
708            col.alignment = match (flush_left, flush_right) {
709                (true, true) => Alignment::Default,
710                (true, false) => Alignment::Left,
711                (false, true) => Alignment::Right,
712                (false, false) => Alignment::Center,
713            };
714        } else {
715            // Without header, alignment based on first row (we'll handle this later)
716            col.alignment = Alignment::Default;
717        }
718    }
719}
720
721/// Try to parse a simple table starting at the given position.
722/// Returns the number of lines consumed if successful.
723pub(crate) fn try_parse_simple_table(
724    window: &StrippedLines<'_, '_>,
725    builder: &mut GreenNodeBuilder<'static>,
726    config: &ParserOptions,
727) -> Option<usize> {
728    let lines = window.raw();
729    let start_pos = window.pos();
730    log::trace!("try_parse_simple_table at line {}", start_pos + 1);
731
732    if start_pos >= lines.len() {
733        return None;
734    }
735
736    // Cheap gate before the O(buffer) `strip_all` below: a simple table's
737    // separator must sit on the dispatch line or the line just after it (see
738    // `find_separator_line`). Table detection runs at every block start, so
739    // stripping the whole line buffer for every prose/math paragraph that
740    // can't be a table was quadratic on large documents. Peek just those one
741    // or two lines via `strip_at` and bail before materializing the full view.
742    let gate_first = window.strip_at(start_pos);
743    let separator_here = try_parse_table_separator(gate_first).is_some();
744    let separator_next = !separator_here
745        && start_pos + 1 < lines.len()
746        && !gate_first.trim().is_empty()
747        && try_parse_table_separator(window.strip_at(start_pos + 1)).is_some();
748    if !separator_here && !separator_next {
749        return None;
750    }
751
752    // Detection scans read the container-prefix-stripped view lazily through
753    // the window (see `LineView`): a table nested in `list → blockquote`
754    // (e.g. `- >  a   b`) has its `  > ` prefix removed before the
755    // separator/column-shape checks. With an empty prefix the stripped view
756    // equals the raw lines. Scans stop at the first blank line, so only a
757    // bounded range is ever stripped. Emission re-emits the prefix bytes as
758    // tokens via the window; captions/blank lines still read raw `lines`.
759
760    // Look for a separator line
761    let separator_pos = find_separator_line(window, start_pos)?;
762    log::trace!("  found separator at line {}", separator_pos + 1);
763
764    let separator_line = window.line(separator_pos);
765    let mut columns = try_parse_table_separator(separator_line)?;
766
767    // Determine if there's a header (separator not at start)
768    let has_header = separator_pos > start_pos;
769    let header_line = if has_header {
770        Some(window.line(separator_pos - 1))
771    } else {
772        None
773    };
774
775    // Determine alignments
776    determine_alignments(&mut columns, separator_line, header_line);
777
778    // Find table end (blank line or end of input)
779    let end_pos = find_table_end(window, separator_pos + 1);
780
781    // Must have at least one data row (or it's just a separator)
782    let data_rows = end_pos - separator_pos - 1;
783
784    if data_rows == 0 {
785        return None;
786    }
787
788    // Check for caption before table
789    let caption_before = find_caption_before_table(window, start_pos);
790
791    // Check for caption after table
792    let caption_after = if caption_before.is_some() {
793        None
794    } else {
795        find_caption_after_table(window, end_pos)
796    };
797
798    // Build the table
799    builder.start_node(SyntaxKind::SIMPLE_TABLE.into());
800
801    // Emit caption before if present
802    if let Some((cap_start, cap_end)) = caption_before {
803        emit_table_caption(builder, window, cap_start, cap_end, config);
804        // Emit blank line between caption and table if present
805        emit_caption_blank_lines(builder, window, cap_end, start_pos);
806    }
807
808    // Emit header if present. On the dispatch line the core already emitted
809    // the container prefix; only continuation rows re-emit it (via the window
810    // inside `emit_table_row`).
811    if has_header {
812        emit_table_row(
813            builder,
814            window,
815            separator_pos - 1,
816            &columns,
817            SyntaxKind::TABLE_HEADER,
818            config,
819        );
820    }
821
822    // Emit separator, re-emitting any continuation-line container prefix
823    // (`  > `) as WHITESPACE/BLOCK_QUOTE_MARKER tokens before the row text.
824    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
825    let separator_tail = window.emit_or_dispatch_tail(builder, separator_pos);
826    emit_line_tokens(builder, separator_tail);
827    builder.finish_node();
828
829    // Emit data rows (always continuation lines)
830    for idx in (separator_pos + 1)..end_pos {
831        emit_table_row(
832            builder,
833            window,
834            idx,
835            &columns,
836            SyntaxKind::TABLE_ROW,
837            config,
838        );
839    }
840
841    // Emit caption after if present
842    if let Some((cap_start, cap_end)) = caption_after {
843        // Emit blank line before caption if needed
844        emit_caption_blank_lines(builder, window, end_pos, cap_start);
845        emit_table_caption(builder, window, cap_start, cap_end, config);
846    }
847
848    builder.finish_node(); // SimpleTable
849
850    // Calculate lines consumed (including captions)
851    let table_start = if let Some((cap_start, _)) = caption_before {
852        cap_start
853    } else if has_header {
854        separator_pos - 1
855    } else {
856        separator_pos
857    };
858
859    let table_end = if let Some((_, cap_end)) = caption_after {
860        cap_end
861    } else {
862        end_pos
863    };
864
865    let lines_consumed = table_end - table_start;
866
867    Some(lines_consumed)
868}
869
870/// Find the position of a separator line starting from pos.
871fn find_separator_line(lines: &(impl LineView + ?Sized), start_pos: usize) -> Option<usize> {
872    log::trace!("  find_separator_line from line {}", start_pos + 1);
873
874    // Check first line
875    log::trace!("    checking first line: {:?}", lines.line(start_pos));
876    if try_parse_table_separator(lines.line(start_pos)).is_some() {
877        log::trace!("    separator found at first line");
878        return Some(start_pos);
879    }
880
881    // Check second line (for table with header)
882    if start_pos + 1 < lines.line_count()
883        && !lines.line(start_pos).trim().is_empty()
884        && try_parse_table_separator(lines.line(start_pos + 1)).is_some()
885    {
886        return Some(start_pos + 1);
887    }
888    None
889}
890
891/// Find where the table ends (first blank line or end of input).
892fn find_table_end(lines: &(impl LineView + ?Sized), start_pos: usize) -> usize {
893    for i in start_pos..lines.line_count() {
894        if lines.line(i).trim().is_empty() {
895            return i;
896        }
897        // Check if this could be a closing separator
898        if try_parse_table_separator(lines.line(i)).is_some() {
899            // Check if next line is blank or end
900            if i + 1 >= lines.line_count() || lines.line(i + 1).trim().is_empty() {
901                return i + 1;
902            }
903        }
904    }
905    lines.line_count()
906}
907
908/// Emit a table row (header or data row) with inline-parsed cells for simple tables.
909/// Uses column boundaries from the separator line to extract cells.
910fn emit_table_row(
911    builder: &mut GreenNodeBuilder<'static>,
912    window: &StrippedLines<'_, '_>,
913    abs_idx: usize,
914    columns: &[Column],
915    row_kind: SyntaxKind,
916    config: &ParserOptions,
917) {
918    builder.start_node(row_kind.into());
919
920    // On continuation lines the leading `  > ` prefix is re-emitted as
921    // WHITESPACE/BLOCK_QUOTE_MARKER tokens inside the row node and the
922    // stripped tail returned; the dispatch line just strips its (already
923    // core-emitted) prefix. Empty prefix ⇒ the raw line.
924    let line = window.emit_or_dispatch_tail(builder, abs_idx);
925
926    let (line_without_newline, newline_str) = strip_newline(line);
927
928    // Emit leading whitespace if present
929    let trimmed = line_without_newline.trim_start();
930    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
931    if leading_ws_len > 0 {
932        builder.token(
933            SyntaxKind::WHITESPACE.into(),
934            &line_without_newline[..leading_ws_len],
935        );
936    }
937
938    // Track where we are in the line (for losslessness)
939    let mut current_pos = 0;
940
941    // Extract and emit cells based on column boundaries
942    for col in columns.iter() {
943        // Calculate actual positions in the trimmed line (accounting for leading whitespace)
944        let cell_start = if col.start >= leading_ws_len {
945            column_offset_to_byte_index(trimmed, col.start - leading_ws_len)
946        } else {
947            0
948        };
949
950        let cell_end = if col.end >= leading_ws_len {
951            column_offset_to_byte_index(trimmed, col.end - leading_ws_len)
952        } else {
953            0
954        };
955
956        // Extract cell text from column bounds. When the column lies entirely
957        // before the trimmed content (col.end <= leading_ws_len) both bounds
958        // clamp to 0; treat that as an empty cell rather than re-emitting the
959        // whole row.
960        let cell_text = if cell_start < cell_end && cell_start < trimmed.len() {
961            &trimmed[cell_start..cell_end]
962        } else {
963            ""
964        };
965
966        let cell_content = cell_text.trim();
967        let cell_content_start = cell_text.len() - cell_text.trim_start().len();
968
969        // Emit any whitespace from current position to start of cell content
970        let content_abs_pos = (cell_start + cell_content_start).min(trimmed.len());
971        if current_pos < content_abs_pos {
972            builder.token(
973                SyntaxKind::WHITESPACE.into(),
974                &trimmed[current_pos..content_abs_pos],
975            );
976        }
977
978        // Emit cell with inline parsing
979        emit_table_cell(builder, cell_content, config);
980
981        // Update current position to end of cell content
982        current_pos = content_abs_pos + cell_content.len();
983    }
984
985    // Emit any remaining whitespace after last cell
986    if current_pos < trimmed.len() {
987        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
988    }
989
990    // Emit newline if present
991    if !newline_str.is_empty() {
992        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
993    }
994
995    builder.finish_node();
996}
997
998// ============================================================================
999// Pipe Table Parsing
1000// ============================================================================
1001
1002/// Check if a line is a pipe table separator line.
1003/// Returns the column alignments if it's a valid separator.
1004fn try_parse_pipe_separator(line: &str) -> Option<Vec<Alignment>> {
1005    let trimmed = line.trim();
1006
1007    // Must contain at least one pipe
1008    if !trimmed.contains('|') && !trimmed.contains('+') {
1009        return None;
1010    }
1011
1012    // Split by pipes (or + for orgtbl variant)
1013    let cells: Vec<&str> = if trimmed.contains('+') {
1014        // Orgtbl variant: use + as separator in separator line
1015        trimmed.split(['|', '+']).collect()
1016    } else {
1017        trimmed.split('|').collect()
1018    };
1019
1020    let mut alignments = Vec::new();
1021
1022    for cell in cells {
1023        let cell = cell.trim();
1024
1025        // Skip empty cells (from leading/trailing pipes)
1026        if cell.is_empty() {
1027            continue;
1028        }
1029
1030        // Must be dashes with optional colons
1031        let starts_colon = cell.starts_with(':');
1032        let ends_colon = cell.ends_with(':');
1033
1034        // Remove colons to check if rest is all dashes
1035        let without_colons = cell.trim_start_matches(':').trim_end_matches(':');
1036
1037        // Must have at least one dash
1038        if without_colons.is_empty() || !without_colons.chars().all(|c| c == '-') {
1039            return None;
1040        }
1041
1042        // Determine alignment from colon positions
1043        let alignment = match (starts_colon, ends_colon) {
1044            (true, true) => Alignment::Center,
1045            (true, false) => Alignment::Left,
1046            (false, true) => Alignment::Right,
1047            (false, false) => Alignment::Default,
1048        };
1049
1050        alignments.push(alignment);
1051    }
1052
1053    // Must have at least one column
1054    if alignments.is_empty() {
1055        None
1056    } else {
1057        Some(alignments)
1058    }
1059}
1060
1061/// Split a pipe table row into cells.
1062/// Handles escaped pipes (\|) properly by not splitting on them.
1063fn parse_pipe_table_row(line: &str) -> Vec<String> {
1064    let trimmed = line.trim();
1065
1066    let mut cells = Vec::new();
1067    let mut current_cell = String::new();
1068    let mut chars = trimmed.chars().peekable();
1069    let mut char_count = 0;
1070
1071    while let Some(ch) = chars.next() {
1072        char_count += 1;
1073        match ch {
1074            '\\' => {
1075                // Check if next char is a pipe - if so, it's an escaped pipe
1076                if let Some(&'|') = chars.peek() {
1077                    current_cell.push('\\');
1078                    current_cell.push('|');
1079                    chars.next(); // consume the pipe
1080                } else {
1081                    current_cell.push(ch);
1082                }
1083            }
1084            '|' => {
1085                // Check if this is the leading pipe (first character)
1086                if char_count == 1 {
1087                    continue; // Skip leading pipe
1088                }
1089
1090                // End current cell, start new one
1091                cells.push(current_cell.trim().to_string());
1092                current_cell.clear();
1093            }
1094            _ => {
1095                current_cell.push(ch);
1096            }
1097        }
1098    }
1099
1100    // Add last cell if it's not empty (it would be empty if line ended with pipe)
1101    let trimmed_cell = current_cell.trim().to_string();
1102    if !trimmed_cell.is_empty() {
1103        cells.push(trimmed_cell);
1104    }
1105
1106    cells
1107}
1108
1109/// Emit a pipe table row with inline-parsed cells.
1110/// Preserves losslessness by emitting exact byte representation while parsing cell content inline.
1111fn emit_pipe_table_row(
1112    builder: &mut GreenNodeBuilder<'static>,
1113    window: &StrippedLines<'_, '_>,
1114    abs_idx: usize,
1115    row_kind: SyntaxKind,
1116    config: &ParserOptions,
1117) {
1118    builder.start_node(row_kind.into());
1119
1120    // On continuation lines (separator/data rows under a list+blockquote
1121    // container) the leading `  > ` prefix is not consumed by the core;
1122    // `emit_prefix_at` re-emits it as WHITESPACE/BLOCK_QUOTE_MARKER tokens
1123    // and returns the stripped tail. On the dispatch line the core already
1124    // emitted the prefix, so `dispatch_tail` just strips it from our view.
1125    // With an empty prefix (non-nested tables) both are no-ops returning
1126    // the raw line.
1127    let line = if abs_idx == window.dispatch_pos() {
1128        window.dispatch_tail()
1129    } else {
1130        window.emit_prefix_at(builder, abs_idx)
1131    };
1132
1133    let (line_without_newline, newline_str) = strip_newline(line);
1134    let trimmed = line_without_newline.trim();
1135
1136    // Parse cell boundaries
1137    let mut cell_starts = Vec::new();
1138    let mut cell_ends = Vec::new();
1139    let mut in_escape = false;
1140
1141    // Find all pipe positions (excluding escaped ones)
1142    let mut pipe_positions = Vec::new();
1143    for (i, ch) in trimmed.char_indices() {
1144        if in_escape {
1145            in_escape = false;
1146            continue;
1147        }
1148        if ch == '\\' {
1149            in_escape = true;
1150            continue;
1151        }
1152        if ch == '|' {
1153            pipe_positions.push(i);
1154        }
1155    }
1156
1157    // Determine cell boundaries based on pipe positions
1158    if pipe_positions.is_empty() {
1159        // No pipes - treat entire line as one cell (shouldn't happen for valid pipe tables)
1160        cell_starts.push(0);
1161        cell_ends.push(trimmed.len());
1162    } else {
1163        // Check if line starts with pipe
1164        let start_pipe = pipe_positions.first() == Some(&0);
1165        // Check if line ends with pipe
1166        let end_pipe = pipe_positions.last() == Some(&(trimmed.len() - 1));
1167
1168        if start_pipe {
1169            // Skip first pipe
1170            for i in 1..pipe_positions.len() {
1171                cell_starts.push(pipe_positions[i - 1] + 1);
1172                cell_ends.push(pipe_positions[i]);
1173            }
1174            // Add last cell if there's no trailing pipe
1175            if !end_pipe {
1176                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1177                cell_ends.push(trimmed.len());
1178            }
1179        } else {
1180            // No leading pipe
1181            cell_starts.push(0);
1182            cell_ends.push(pipe_positions[0]);
1183
1184            for i in 1..pipe_positions.len() {
1185                cell_starts.push(pipe_positions[i - 1] + 1);
1186                cell_ends.push(pipe_positions[i]);
1187            }
1188
1189            // Add last cell if there's no trailing pipe
1190            if !end_pipe {
1191                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1192                cell_ends.push(trimmed.len());
1193            }
1194        }
1195    }
1196
1197    // Emit leading whitespace if present (before trim)
1198    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1199    if leading_ws_len > 0 {
1200        builder.token(
1201            SyntaxKind::WHITESPACE.into(),
1202            &line_without_newline[..leading_ws_len],
1203        );
1204    }
1205
1206    // Emit cells with pipes
1207    for (idx, (start, end)) in cell_starts.iter().zip(cell_ends.iter()).enumerate() {
1208        // Emit pipe before cell (except for first cell if no leading pipe)
1209        if *start > 0 {
1210            builder.token(SyntaxKind::TEXT.into(), "|");
1211        } else if idx == 0 && trimmed.starts_with('|') {
1212            // Leading pipe
1213            builder.token(SyntaxKind::TEXT.into(), "|");
1214        }
1215
1216        // Get cell content with its whitespace
1217        let cell_with_ws = &trimmed[*start..*end];
1218        let cell_content = cell_with_ws.trim();
1219
1220        // Emit leading whitespace within cell
1221        let cell_leading_ws = &cell_with_ws[..cell_with_ws.len() - cell_with_ws.trim_start().len()];
1222        if !cell_leading_ws.is_empty() {
1223            builder.token(SyntaxKind::WHITESPACE.into(), cell_leading_ws);
1224        }
1225
1226        // Emit cell with inline parsing
1227        emit_table_cell(builder, cell_content, config);
1228
1229        // Emit trailing whitespace within cell
1230        let cell_trailing_ws_start = cell_leading_ws.len() + cell_content.len();
1231        if cell_trailing_ws_start < cell_with_ws.len() {
1232            builder.token(
1233                SyntaxKind::WHITESPACE.into(),
1234                &cell_with_ws[cell_trailing_ws_start..],
1235            );
1236        }
1237    }
1238
1239    // Emit trailing pipe if present
1240    if !pipe_positions.is_empty() && trimmed.ends_with('|') {
1241        builder.token(SyntaxKind::TEXT.into(), "|");
1242    }
1243
1244    // Emit trailing whitespace after trim (before newline)
1245    let trailing_ws_start = leading_ws_len + trimmed.len();
1246    if trailing_ws_start < line_without_newline.len() {
1247        builder.token(
1248            SyntaxKind::WHITESPACE.into(),
1249            &line_without_newline[trailing_ws_start..],
1250        );
1251    }
1252
1253    // Emit newline
1254    if !newline_str.is_empty() {
1255        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1256    }
1257
1258    builder.finish_node();
1259}
1260
1261/// Try to parse a pipe table starting at the given position.
1262/// Returns the number of lines consumed if successful.
1263pub(crate) fn try_parse_pipe_table(
1264    window: &StrippedLines<'_, '_>,
1265    builder: &mut GreenNodeBuilder<'static>,
1266    config: &ParserOptions,
1267) -> Option<usize> {
1268    let lines = window.raw();
1269    let start_pos = window.pos();
1270    if start_pos + 1 >= lines.len() {
1271        return None;
1272    }
1273
1274    // Cheap gate: a pipe table's first line must contain a `|` (it is either
1275    // the header or, headerless, the delimiter row), unless this is a
1276    // caption-led table. Table detection runs at every block start, so doing
1277    // any per-line work for every prose/math paragraph was quadratic on large
1278    // documents. Peek the dispatch line and run the (bounded) caption probe on
1279    // the same stripped `window` the detection below uses, so the gate applies
1280    // inside containers (blockquote/list) too — not just at top level.
1281    if !window.strip_at(start_pos).contains('|') && !is_caption_followed_by_table(window, start_pos)
1282    {
1283        return None;
1284    }
1285
1286    // Detection scans read the container-prefix-stripped view lazily through
1287    // the window (see `LineView`), so a table nested in `list → blockquote`
1288    // (e.g. `- > | a | b |`) has its `  > ` prefix removed before the
1289    // separator/cell shape checks. The dispatch line uses the emission-safe
1290    // line-0 strip (its prefix was consumed by the core); every other line
1291    // gets the full continuation strip. Scans stop at the first blank line, so
1292    // only a bounded range is stripped. Emission still reads raw `lines` so the
1293    // prefix bytes can be re-emitted as tokens.
1294
1295    // Check if this line is a caption followed by a table
1296    // If so, the actual table starts after the caption and blank line
1297    let (actual_start, caption_before) = if is_caption_followed_by_table(window, start_pos) {
1298        let (cap_start, cap_end) = caption_range_starting_at(window, start_pos)?;
1299        let mut pos = cap_end;
1300        while pos < window.line_count() && window.line(pos).trim().is_empty() {
1301            pos += 1;
1302        }
1303        (pos, Some((cap_start, cap_end)))
1304    } else {
1305        (start_pos, None)
1306    };
1307
1308    if actual_start + 1 >= lines.len() {
1309        return None;
1310    }
1311
1312    // First line should have pipes (potential header)
1313    if !window.line(actual_start).contains('|') {
1314        return None;
1315    }
1316
1317    // Second line should be separator
1318    let alignments = try_parse_pipe_separator(window.line(actual_start + 1))?;
1319
1320    // Parse header cells
1321    let header_cells = parse_pipe_table_row(window.line(actual_start));
1322
1323    // Number of columns should match (approximately - be lenient)
1324    if header_cells.len() != alignments.len() && !header_cells.is_empty() {
1325        // Only fail if very different
1326        if header_cells.len() < alignments.len() / 2 || header_cells.len() > alignments.len() * 2 {
1327            return None;
1328        }
1329    }
1330
1331    // Find table end (first blank line or end of input)
1332    let mut end_pos = actual_start + 2;
1333    while end_pos < window.line_count() {
1334        let line = window.line(end_pos);
1335        if line.trim().is_empty() {
1336            break;
1337        }
1338        // Row should have pipes
1339        if !line.contains('|') {
1340            break;
1341        }
1342        end_pos += 1;
1343    }
1344
1345    // Must have at least one data row
1346    if end_pos <= actual_start + 2 {
1347        return None;
1348    }
1349
1350    // Check for caption before table (only if we didn't already detect it)
1351    let caption_before = caption_before.or_else(|| find_caption_before_table(window, actual_start));
1352
1353    // Check for caption after table
1354    let caption_after = if caption_before.is_some() {
1355        None
1356    } else {
1357        find_caption_after_table(window, end_pos)
1358    };
1359
1360    // Build the pipe table
1361    builder.start_node(SyntaxKind::PIPE_TABLE.into());
1362
1363    // Emit caption before if present
1364    if let Some((cap_start, cap_end)) = caption_before {
1365        emit_table_caption(builder, window, cap_start, cap_end, config);
1366        // Emit blank line between caption and table if present
1367        emit_caption_blank_lines(builder, window, cap_end, actual_start);
1368    }
1369
1370    // Emit header row with inline-parsed cells. On the dispatch line the
1371    // core already emitted the container prefix; only when the header is a
1372    // continuation line (e.g. it follows a caption-before line) do we emit
1373    // the prefix here.
1374    emit_pipe_table_row(
1375        builder,
1376        window,
1377        actual_start,
1378        SyntaxKind::TABLE_HEADER,
1379        config,
1380    );
1381
1382    // Emit separator, re-emitting any continuation-line container prefix
1383    // (`  > `) as WHITESPACE/BLOCK_QUOTE_MARKER tokens before the row text.
1384    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1385    let sep_idx = actual_start + 1;
1386    let separator_tail = if sep_idx == window.dispatch_pos() {
1387        window.dispatch_tail()
1388    } else {
1389        window.emit_prefix_at(builder, sep_idx)
1390    };
1391    emit_line_tokens(builder, separator_tail);
1392    builder.finish_node();
1393
1394    // Emit data rows with inline-parsed cells (always continuation lines)
1395    for idx in (actual_start + 2)..end_pos {
1396        emit_pipe_table_row(builder, window, idx, SyntaxKind::TABLE_ROW, config);
1397    }
1398
1399    // Emit caption after if present
1400    if let Some((cap_start, cap_end)) = caption_after {
1401        // Emit blank line before caption if needed
1402        emit_caption_blank_lines(builder, window, end_pos, cap_start);
1403        emit_table_caption(builder, window, cap_start, cap_end, config);
1404    }
1405
1406    builder.finish_node(); // PipeTable
1407
1408    // Calculate lines consumed
1409    let table_start = caption_before
1410        .map(|(start, _)| start)
1411        .unwrap_or(actual_start);
1412    let table_end = if let Some((_, cap_end)) = caption_after {
1413        cap_end
1414    } else {
1415        end_pos
1416    };
1417
1418    Some(table_end - table_start)
1419}
1420
1421#[cfg(test)]
1422mod tests {
1423    use super::super::container_prefix::ContainerPrefix;
1424    use super::*;
1425
1426    #[test]
1427    fn test_separator_detection() {
1428        assert!(try_parse_table_separator("------- ------ ----------   -------").is_some());
1429        assert!(try_parse_table_separator("  ---  ---  ---").is_some());
1430        assert!(try_parse_table_separator("-------").is_none()); // horizontal rule
1431        assert!(try_parse_table_separator("--- --- ---").is_some()); // table separator
1432    }
1433
1434    #[test]
1435    fn test_column_extraction() {
1436        let line = "-------     ------ ----------   -------";
1437        let columns = extract_columns(line, 0);
1438        assert_eq!(columns.len(), 4);
1439    }
1440
1441    #[test]
1442    fn test_simple_table_with_header() {
1443        let input = vec![
1444            "  Right     Left     Center     Default",
1445            "-------     ------ ----------   -------",
1446            "     12     12        12            12",
1447            "    123     123       123          123",
1448            "",
1449        ];
1450
1451        let mut builder = GreenNodeBuilder::new();
1452        let prefix = ContainerPrefix::default();
1453        let window = StrippedLines::new(&input, 0, &prefix);
1454        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1455
1456        assert!(result.is_some());
1457        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1458    }
1459
1460    #[test]
1461    fn test_headerless_table() {
1462        let input = vec![
1463            "-------     ------ ----------   -------",
1464            "     12     12        12            12",
1465            "    123     123       123          123",
1466            "",
1467        ];
1468
1469        let mut builder = GreenNodeBuilder::new();
1470        let prefix = ContainerPrefix::default();
1471        let window = StrippedLines::new(&input, 0, &prefix);
1472        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1473
1474        assert!(result.is_some());
1475        assert_eq!(result.unwrap(), 3); // sep + 2 rows
1476    }
1477
1478    #[test]
1479    fn test_caption_prefix_detection() {
1480        assert!(try_parse_caption_prefix("Table: My caption").is_some());
1481        assert!(try_parse_caption_prefix("table: My caption").is_some());
1482        assert!(try_parse_caption_prefix(": My caption").is_some());
1483        assert!(try_parse_caption_prefix(":").is_none()); // Just colon, no content
1484        assert!(try_parse_caption_prefix("Not a caption").is_none());
1485    }
1486
1487    #[test]
1488    fn table_grid_starts_at_matches_each_kind() {
1489        // Positives — one shape per table kind the real parsers accept.
1490        assert!(table_grid_starts_at(&["+---+---+"][..], 0)); // grid
1491        assert!(table_grid_starts_at(&["----------- -------"][..], 0)); // multiline
1492        assert!(table_grid_starts_at(&["--- --- ---"][..], 0)); // simple, headerless
1493        assert!(table_grid_starts_at(&["A | B", "| --- | --- |"][..], 0)); // pipe, header + sep
1494        assert!(table_grid_starts_at(&["A    B", "--- ---"][..], 0)); // simple, header + sep
1495        // A lone dash run is a multiline full-width separator under Pandoc (not a
1496        // thematic break), so the lookahead intentionally accepts it; the full
1497        // parser then rejects it if no rows follow.
1498        assert!(table_grid_starts_at(&["-------"][..], 0));
1499
1500        // Negatives — shapes that must not read as a table start.
1501        assert!(!table_grid_starts_at(&["just some prose"][..], 0));
1502        assert!(!table_grid_starts_at(&["# Heading"][..], 0));
1503        assert!(!table_grid_starts_at(&["```", "code", "```"][..], 0)); // code fence
1504        assert!(!table_grid_starts_at(&["only one line"][..], 1)); // out of range
1505    }
1506
1507    /// The cheap caption lookahead must agree with what the full parser does:
1508    /// when it says a table follows the caption, a table node really forms; when
1509    /// it says no table follows, none does. This guards against the lookahead
1510    /// (`table_grid_starts_at`) drifting from the real per-kind parsers.
1511    #[test]
1512    fn caption_lookahead_agrees_with_real_parse() {
1513        let with_table = ": Cap\n\n| A | B |\n|---|---|\n| 1 | 2 |\n";
1514        let lines: Vec<&str> = with_table.lines().collect();
1515        assert!(is_caption_followed_by_table(&lines[..], 0));
1516        assert!(format!("{:#?}", crate::parse(with_table, None)).contains("PIPE_TABLE"));
1517
1518        let no_table = ": Cap\n\nplain paragraph\n";
1519        let lines: Vec<&str> = no_table.lines().collect();
1520        assert!(!is_caption_followed_by_table(&lines[..], 0));
1521        assert!(!format!("{:#?}", crate::parse(no_table, None)).contains("TABLE"));
1522    }
1523
1524    #[test]
1525    fn bare_colon_fenced_code_is_not_table_caption() {
1526        let input = "Term\n: ```\n  code\n  ```\n";
1527        let tree = crate::parse(input, None);
1528
1529        assert!(
1530            tree.descendants()
1531                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1532            "should parse as definition list"
1533        );
1534        assert!(
1535            tree.descendants()
1536                .any(|node| node.kind() == SyntaxKind::CODE_BLOCK),
1537            "definition should preserve fenced code block"
1538        );
1539        assert!(
1540            !tree
1541                .descendants()
1542                .any(|node| node.kind() == SyntaxKind::TABLE_CAPTION),
1543            "fenced code definition should not be parsed as table caption"
1544        );
1545    }
1546
1547    #[test]
1548    fn bare_colon_caption_after_div_opening_is_table_caption() {
1549        let input = "::: {#tbl:panel layout.nrow=\"1\"}\n  : My Caption {#tbl:foo-1}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\n  : My Caption2 {#tbl:foo-2}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\nCaption\n:::\n";
1550        let tree = crate::parse(input, None);
1551
1552        let caption_count = tree
1553            .descendants()
1554            .filter(|node| node.kind() == SyntaxKind::TABLE_CAPTION)
1555            .count();
1556        assert_eq!(
1557            caption_count, 2,
1558            "expected both captions to attach to tables"
1559        );
1560        assert!(
1561            !tree
1562                .descendants()
1563                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1564            "caption lines in this fenced div table layout should not parse as definition list"
1565        );
1566    }
1567
1568    #[test]
1569    fn test_table_with_caption_after() {
1570        let input = vec![
1571            "  Right     Left     Center     Default",
1572            "-------     ------ ----------   -------",
1573            "     12     12        12            12",
1574            "    123     123       123          123",
1575            "",
1576            "Table: Demonstration of simple table syntax.",
1577            "",
1578        ];
1579
1580        let mut builder = GreenNodeBuilder::new();
1581        let prefix = ContainerPrefix::default();
1582        let window = StrippedLines::new(&input, 0, &prefix);
1583        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1584
1585        assert!(result.is_some());
1586        // Should consume: header + sep + 2 rows + blank + caption
1587        assert_eq!(result.unwrap(), 6);
1588    }
1589
1590    #[test]
1591    fn test_table_with_caption_before() {
1592        let input = vec![
1593            "Table: Demonstration of simple table syntax.",
1594            "",
1595            "  Right     Left     Center     Default",
1596            "-------     ------ ----------   -------",
1597            "     12     12        12            12",
1598            "    123     123       123          123",
1599            "",
1600        ];
1601
1602        let mut builder = GreenNodeBuilder::new();
1603        let prefix = ContainerPrefix::default();
1604        let window = StrippedLines::new(&input, 2, &prefix);
1605        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1606
1607        assert!(result.is_some());
1608        // Should consume: caption + blank + header + sep + 2 rows
1609        assert_eq!(result.unwrap(), 6);
1610    }
1611
1612    #[test]
1613    fn test_caption_with_colon_prefix() {
1614        let input = vec![
1615            "  Right     Left",
1616            "-------     ------",
1617            "     12     12",
1618            "",
1619            ": Short caption",
1620            "",
1621        ];
1622
1623        let mut builder = GreenNodeBuilder::new();
1624        let prefix = ContainerPrefix::default();
1625        let window = StrippedLines::new(&input, 0, &prefix);
1626        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1627
1628        assert!(result.is_some());
1629        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1630    }
1631
1632    #[test]
1633    fn test_multiline_caption() {
1634        let input = vec![
1635            "  Right     Left",
1636            "-------     ------",
1637            "     12     12",
1638            "",
1639            "Table: This is a longer caption",
1640            "that spans multiple lines.",
1641            "",
1642        ];
1643
1644        let mut builder = GreenNodeBuilder::new();
1645        let prefix = ContainerPrefix::default();
1646        let window = StrippedLines::new(&input, 0, &prefix);
1647        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1648
1649        assert!(result.is_some());
1650        // Should consume through end of multi-line caption
1651        assert_eq!(result.unwrap(), 6);
1652    }
1653
1654    #[test]
1655    fn test_simple_table_with_multibyte_cell_content() {
1656        let input = vec![
1657            "Name            Hex code     Hue     C, M, Y, K (%)   R, G, B (0-255)   R, G, B (%)",
1658            "--------------  ------------ ------- ---------------- ----------------- ------------",
1659            "        orange       #E69F00     41° 0, 50, 100, 0    230, 159, 0       90, 60, 0",
1660            "      sky blue       #56B4E9    202° 80, 0, 0, 0      86, 180, 233      35, 70, 90",
1661            "",
1662        ];
1663
1664        let mut builder = GreenNodeBuilder::new();
1665        let prefix = ContainerPrefix::default();
1666        let window = StrippedLines::new(&input, 0, &prefix);
1667        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1668
1669        assert!(result.is_some());
1670        assert_eq!(result.unwrap(), 4);
1671    }
1672
1673    // Pipe table tests
1674    #[test]
1675    fn test_pipe_separator_detection() {
1676        assert!(try_parse_pipe_separator("|------:|:-----|---------|:------:|").is_some());
1677        assert!(try_parse_pipe_separator("|---|---|").is_some());
1678        assert!(try_parse_pipe_separator("-----|-----:").is_some()); // No leading pipe
1679        assert!(try_parse_pipe_separator("|-----+-------|").is_some()); // Orgtbl variant
1680        assert!(try_parse_pipe_separator("not a separator").is_none());
1681    }
1682
1683    #[test]
1684    fn test_pipe_alignments() {
1685        let aligns = try_parse_pipe_separator("|------:|:-----|---------|:------:|").unwrap();
1686        assert_eq!(aligns.len(), 4);
1687        assert_eq!(aligns[0], Alignment::Right);
1688        assert_eq!(aligns[1], Alignment::Left);
1689        assert_eq!(aligns[2], Alignment::Default);
1690        assert_eq!(aligns[3], Alignment::Center);
1691    }
1692
1693    #[test]
1694    fn test_parse_pipe_table_row() {
1695        let cells = parse_pipe_table_row("| Right | Left | Center |");
1696        assert_eq!(cells.len(), 3);
1697        assert_eq!(cells[0], "Right");
1698        assert_eq!(cells[1], "Left");
1699        assert_eq!(cells[2], "Center");
1700
1701        // Without leading/trailing pipes
1702        let cells2 = parse_pipe_table_row("Right | Left | Center");
1703        assert_eq!(cells2.len(), 3);
1704    }
1705
1706    #[test]
1707    fn test_basic_pipe_table() {
1708        let input = vec![
1709            "",
1710            "| Right | Left | Center |",
1711            "|------:|:-----|:------:|",
1712            "|   12  |  12  |   12   |",
1713            "|  123  |  123 |  123   |",
1714            "",
1715        ];
1716
1717        let mut builder = GreenNodeBuilder::new();
1718        let prefix = ContainerPrefix::default();
1719        let window = StrippedLines::new(&input, 1, &prefix);
1720        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1721
1722        assert!(result.is_some());
1723        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1724    }
1725
1726    #[test]
1727    fn test_pipe_table_no_edge_pipes() {
1728        let input = vec![
1729            "",
1730            "fruit| price",
1731            "-----|-----:",
1732            "apple|2.05",
1733            "pear|1.37",
1734            "",
1735        ];
1736
1737        let mut builder = GreenNodeBuilder::new();
1738        let prefix = ContainerPrefix::default();
1739        let window = StrippedLines::new(&input, 1, &prefix);
1740        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1741
1742        assert!(result.is_some());
1743        assert_eq!(result.unwrap(), 4);
1744    }
1745
1746    #[test]
1747    fn test_pipe_table_with_caption() {
1748        let input = vec![
1749            "",
1750            "| Col1 | Col2 |",
1751            "|------|------|",
1752            "| A    | B    |",
1753            "",
1754            "Table: My pipe table",
1755            "",
1756        ];
1757
1758        let mut builder = GreenNodeBuilder::new();
1759        let prefix = ContainerPrefix::default();
1760        let window = StrippedLines::new(&input, 1, &prefix);
1761        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1762
1763        assert!(result.is_some());
1764        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1765    }
1766
1767    #[test]
1768    fn test_pipe_table_with_multiline_caption_before() {
1769        let input = vec![
1770            ": (#tab:base) base R quoting",
1771            "functions",
1772            "",
1773            "| C | D |",
1774            "|---|---|",
1775            "| 3 | 4 |",
1776            "",
1777        ];
1778
1779        let mut builder = GreenNodeBuilder::new();
1780        let prefix = ContainerPrefix::default();
1781        let window = StrippedLines::new(&input, 0, &prefix);
1782        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1783
1784        assert!(result.is_some());
1785        // caption(2) + blank(1) + header + sep + row
1786        assert_eq!(result.unwrap(), 6);
1787    }
1788}
1789
1790// ============================================================================
1791// Grid Table Parsing
1792// ============================================================================
1793
1794/// Check if a line is a grid table row separator (starts with +, contains -, ends with +).
1795/// Returns Some(vec of column info) if valid, None otherwise.
1796fn try_parse_grid_separator(line: &str) -> Option<Vec<GridColumn>> {
1797    let trimmed = line.trim_start();
1798    let leading_spaces = line.len() - trimmed.len();
1799
1800    // A grid border must begin at column 0 of its container content. Detection
1801    // runs on the container-prefix-stripped line (see `try_parse_grid_table`),
1802    // so any remaining leading whitespace means the border is indented relative
1803    // to its container -- pandoc parses that as a paragraph, not a grid table.
1804    if leading_spaces > 0 {
1805        return None;
1806    }
1807
1808    // Must start with + and end with +
1809    if !trimmed.starts_with('+') || !trimmed.trim_end().ends_with('+') {
1810        return None;
1811    }
1812
1813    // Split by + to get column segments
1814    let trimmed = trimmed.trim_end();
1815    let segments: Vec<&str> = trimmed.split('+').collect();
1816
1817    // Need at least 3 parts: empty before first +, column(s), empty after last +
1818    if segments.len() < 3 {
1819        return None;
1820    }
1821
1822    let mut columns = Vec::new();
1823
1824    // Parse each segment between + signs
1825    for segment in segments.iter().skip(1).take(segments.len() - 2) {
1826        if segment.is_empty() {
1827            continue;
1828        }
1829
1830        // Segment must be dashes/equals with optional colons for alignment
1831        let seg_trimmed = *segment;
1832
1833        // Get the fill character (after removing colons)
1834        let inner = seg_trimmed.trim_start_matches(':').trim_end_matches(':');
1835
1836        // Must be all dashes or all equals
1837        if inner.is_empty() {
1838            return None;
1839        }
1840
1841        let first_char = inner.chars().next().unwrap();
1842        if first_char != '-' && first_char != '=' {
1843            return None;
1844        }
1845
1846        if !inner.chars().all(|c| c == first_char) {
1847            return None;
1848        }
1849
1850        let is_header_sep = first_char == '=';
1851
1852        columns.push(GridColumn {
1853            is_header_separator: is_header_sep,
1854            width: seg_trimmed.chars().count(),
1855        });
1856    }
1857
1858    if columns.is_empty() {
1859        None
1860    } else {
1861        Some(columns)
1862    }
1863}
1864
1865/// Column information for grid tables.
1866#[derive(Debug, Clone)]
1867struct GridColumn {
1868    is_header_separator: bool,
1869    width: usize,
1870}
1871
1872fn slice_cell_by_display_width(line: &str, start_byte: usize, width: usize) -> (usize, usize) {
1873    let mut end_byte = start_byte;
1874    let mut display_cols = 0usize;
1875
1876    for (offset, ch) in line[start_byte..].char_indices() {
1877        if ch == '|' {
1878            let sep_byte = start_byte + offset;
1879            return (sep_byte, sep_byte + 1);
1880        }
1881        let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
1882        if display_cols + ch_width > width {
1883            break;
1884        }
1885        display_cols += ch_width;
1886        end_byte = start_byte + offset + ch.len_utf8();
1887        if display_cols >= width {
1888            break;
1889        }
1890    }
1891
1892    // If the width budget is exhausted before seeing a separator (for example
1893    // because of padding/layout drift), advance to the next literal separator
1894    // to keep row slicing aligned and preserve losslessness.
1895    let mut sep_byte = end_byte;
1896    while sep_byte < line.len() {
1897        let mut chars = line[sep_byte..].chars();
1898        let Some(ch) = chars.next() else {
1899            break;
1900        };
1901        if ch == '|' {
1902            return (sep_byte, sep_byte + 1);
1903        }
1904        sep_byte += ch.len_utf8();
1905    }
1906
1907    (end_byte, end_byte)
1908}
1909
1910/// Check if a line is a grid table content row.
1911/// Accepts normal rows ending with `|` and spanning-style continuation lines ending with `+`.
1912fn is_grid_content_row(line: &str) -> bool {
1913    let trimmed = line.trim_start();
1914    let leading_spaces = line.len() - trimmed.len();
1915
1916    if leading_spaces > 3 {
1917        return false;
1918    }
1919
1920    let trimmed = trimmed.trim_end();
1921    trimmed.starts_with('|') && (trimmed.ends_with('|') || trimmed.ends_with('+'))
1922}
1923
1924/// Extract cell contents from a single grid table row line.
1925/// Returns a vector of cell contents (trimmed) based on column boundaries.
1926/// Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1927fn extract_grid_cells_from_line(line: &str, _columns: &[GridColumn]) -> Vec<String> {
1928    let (line_content, _) = strip_newline(line);
1929    let line_trimmed = line_content.trim();
1930
1931    if !line_trimmed.starts_with('|') || !line_trimmed.ends_with('|') {
1932        return vec![String::new(); _columns.len()];
1933    }
1934
1935    let mut cells = Vec::with_capacity(_columns.len());
1936    let mut pos_byte = 1; // Skip leading pipe
1937
1938    for col in _columns {
1939        let col_idx = cells.len();
1940        if pos_byte >= line_trimmed.len() {
1941            cells.push(String::new());
1942            continue;
1943        }
1944
1945        let start_byte = pos_byte;
1946        let end_byte = if col_idx + 1 == _columns.len() {
1947            line_trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
1948        } else {
1949            let (end, next_start) = slice_cell_by_display_width(line_trimmed, pos_byte, col.width);
1950            pos_byte = next_start;
1951            end
1952        };
1953        cells.push(line_trimmed[start_byte..end_byte].trim().to_string());
1954        if col_idx + 1 == _columns.len() {
1955            pos_byte = line_trimmed.len();
1956        }
1957    }
1958
1959    cells
1960}
1961
1962/// Emit a grid table row with inline-parsed cells.
1963/// Handles multi-line rows by emitting first line with TABLE_CELL nodes,
1964/// then continuation lines as raw TEXT for losslessness.
1965fn emit_grid_table_row(
1966    builder: &mut GreenNodeBuilder<'static>,
1967    window: &StrippedLines<'_, '_>,
1968    indices: &[usize],
1969    columns: &[GridColumn],
1970    row_kind: SyntaxKind,
1971    config: &ParserOptions,
1972) {
1973    if indices.is_empty() {
1974        return;
1975    }
1976
1977    builder.start_node(row_kind.into());
1978
1979    // Emit first line with TABLE_CELL nodes. The continuation-line container
1980    // prefix (`  > `) is re-emitted as WHITESPACE/BLOCK_QUOTE_MARKER tokens
1981    // inside the row node before the cell text; the returned tail is the
1982    // prefix-stripped line we slice cells from (empty prefix ⇒ raw line).
1983    // Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
1984    let first_line = window.emit_or_dispatch_tail(builder, indices[0]);
1985    let cell_contents = extract_grid_cells_from_line(first_line, columns);
1986    let (line_without_newline, newline_str) = strip_newline(first_line);
1987    let trimmed = line_without_newline.trim();
1988    let expected_pipe_count = columns.len().saturating_add(1);
1989    let actual_pipe_count = trimmed.chars().filter(|&c| c == '|').count();
1990
1991    // Rows that don't contain all expected column separators (spanning-style rows)
1992    // must be emitted verbatim for losslessness. The first line's prefix was
1993    // already consumed above; emit its tail and each continuation tail.
1994    if actual_pipe_count != expected_pipe_count {
1995        emit_line_tokens(builder, first_line);
1996        for &idx in &indices[1..] {
1997            let tail = window.emit_or_dispatch_tail(builder, idx);
1998            emit_line_tokens(builder, tail);
1999        }
2000        builder.finish_node();
2001        return;
2002    }
2003
2004    // Emit leading whitespace
2005    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
2006    if leading_ws_len > 0 {
2007        builder.token(
2008            SyntaxKind::WHITESPACE.into(),
2009            &line_without_newline[..leading_ws_len],
2010        );
2011    }
2012
2013    // Emit leading pipe
2014    if trimmed.starts_with('|') {
2015        builder.token(SyntaxKind::TEXT.into(), "|");
2016    }
2017
2018    // Emit each cell based on fixed column widths from separators
2019    let mut pos_byte = 1usize; // after leading pipe
2020    for (idx, cell_content) in cell_contents.iter().enumerate() {
2021        let part = if idx < columns.len() && pos_byte <= trimmed.len() {
2022            let start_byte = pos_byte;
2023            let end_byte = if idx + 1 == columns.len() && !trimmed.is_empty() {
2024                trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
2025            } else {
2026                let (end, next_start) =
2027                    slice_cell_by_display_width(trimmed, pos_byte, columns[idx].width);
2028                pos_byte = next_start;
2029                end
2030            };
2031            let slice = &trimmed[start_byte..end_byte];
2032            if idx + 1 == columns.len() {
2033                pos_byte = trimmed.len();
2034            }
2035            slice
2036        } else {
2037            ""
2038        };
2039
2040        // Emit leading whitespace in cell
2041        let cell_trimmed = part.trim();
2042        let ws_start_len = part.len() - part.trim_start().len();
2043        if ws_start_len > 0 {
2044            builder.token(SyntaxKind::WHITESPACE.into(), &part[..ws_start_len]);
2045        }
2046
2047        // Emit TABLE_CELL with inline parsing
2048        emit_table_cell(builder, cell_content, config);
2049
2050        // Emit trailing whitespace in cell
2051        let ws_end_start = ws_start_len + cell_trimmed.len();
2052        if ws_end_start < part.len() {
2053            builder.token(SyntaxKind::WHITESPACE.into(), &part[ws_end_start..]);
2054        }
2055
2056        // Emit pipe separator (unless this is the last cell and line doesn't end with |)
2057        if idx < cell_contents.len() - 1 || trimmed.ends_with('|') {
2058            builder.token(SyntaxKind::TEXT.into(), "|");
2059        }
2060    }
2061
2062    // Emit trailing whitespace before newline
2063    let trailing_ws_start = leading_ws_len + trimmed.len();
2064    if trailing_ws_start < line_without_newline.len() {
2065        builder.token(
2066            SyntaxKind::WHITESPACE.into(),
2067            &line_without_newline[trailing_ws_start..],
2068        );
2069    }
2070
2071    // Emit newline
2072    if !newline_str.is_empty() {
2073        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2074    }
2075
2076    // Emit continuation lines as TEXT for losslessness, re-emitting each
2077    // line's container prefix first.
2078    for &idx in &indices[1..] {
2079        let tail = window.emit_or_dispatch_tail(builder, idx);
2080        emit_line_tokens(builder, tail);
2081    }
2082
2083    builder.finish_node();
2084}
2085
2086/// Try to parse a grid table starting at the given position.
2087/// Returns the number of lines consumed if successful.
2088pub(crate) fn try_parse_grid_table(
2089    window: &StrippedLines<'_, '_>,
2090    builder: &mut GreenNodeBuilder<'static>,
2091    config: &ParserOptions,
2092) -> Option<usize> {
2093    let lines = window.raw();
2094    let start_pos = window.pos();
2095    if start_pos >= lines.len() {
2096        return None;
2097    }
2098
2099    // Grid-border detection reads the stripped view through `UniformStripView`,
2100    // which strips *every* line — including the dispatch line — with the full
2101    // container strip. The strict column-0 check in `try_parse_grid_separator`
2102    // would otherwise reject a `+---+` border sitting at column 0 of a list
2103    // item's inner content if the dispatch line kept its list-indent. With an
2104    // empty prefix the stripped view equals the raw lines. Emission still goes
2105    // through `window.emit_or_dispatch_tail`, which preserves the indent bytes.
2106    // Scans stop at the first blank line, so only a bounded range is stripped.
2107    let view = UniformStripView(window);
2108
2109    // Cheap gate: a grid table's first line is a grid separator (`+---+`/`+===+`),
2110    // unless this is a caption-led table. Table detection runs at every block
2111    // start, so any per-line work for every prose/math paragraph was quadratic
2112    // on large documents. Run the gate on the same `view` the detection uses, so
2113    // it applies inside containers (blockquote/list) too — not just at top level.
2114    if try_parse_grid_separator(view.line(start_pos)).is_none()
2115        && !is_caption_followed_by_table(&view, start_pos)
2116    {
2117        return None;
2118    }
2119
2120    // Check if this line is a caption followed by a table
2121    // If so, the actual table starts after the caption and blank line
2122    let (actual_start, caption_before) = if is_caption_followed_by_table(&view, start_pos) {
2123        let (cap_start, cap_end) = caption_range_starting_at(&view, start_pos)?;
2124        let mut pos = cap_end;
2125        while pos < view.line_count() && view.line(pos).trim().is_empty() {
2126            pos += 1;
2127        }
2128        (pos, Some((cap_start, cap_end)))
2129    } else {
2130        (start_pos, None)
2131    };
2132
2133    if actual_start >= lines.len() {
2134        return None;
2135    }
2136
2137    // First line must be a grid separator
2138    let first_line = view.line(actual_start);
2139    let _columns = try_parse_grid_separator(first_line)?;
2140
2141    // Track table structure
2142    let mut end_pos = actual_start + 1;
2143    let mut found_header_sep = false;
2144    let mut in_footer = false;
2145
2146    // Scan table lines
2147    while end_pos < lines.len() {
2148        let line = view.line(end_pos);
2149
2150        // Check for blank line (table ends)
2151        if line.trim().is_empty() {
2152            break;
2153        }
2154
2155        // Check for separator line
2156        if let Some(sep_cols) = try_parse_grid_separator(line) {
2157            // Check if this is a header separator (=)
2158            if sep_cols.iter().any(|c| c.is_header_separator) {
2159                if !found_header_sep {
2160                    found_header_sep = true;
2161                } else if !in_footer {
2162                    // Second = separator starts footer
2163                    in_footer = true;
2164                }
2165            }
2166            end_pos += 1;
2167            continue;
2168        }
2169
2170        // Check for content row
2171        if is_grid_content_row(line) {
2172            end_pos += 1;
2173            continue;
2174        }
2175
2176        // Not a valid grid table line - table ends
2177        break;
2178    }
2179
2180    // Must have consumed at least 3 lines (top separator, content, bottom separator)
2181    // Or just top + content rows that end with a separator
2182    if end_pos <= actual_start + 1 {
2183        return None;
2184    }
2185
2186    // Last consumed line should be a separator for a well-formed table
2187    // But we'll be lenient and accept tables ending with content rows
2188
2189    // Check for caption before table (only if we didn't already detected it)
2190    let caption_before = caption_before.or_else(|| find_caption_before_table(&view, actual_start));
2191
2192    // Check for caption after table
2193    let caption_after = if caption_before.is_some() {
2194        None
2195    } else {
2196        find_caption_after_table(&view, end_pos)
2197    };
2198
2199    // Build the grid table
2200    builder.start_node(SyntaxKind::GRID_TABLE.into());
2201
2202    // Emit caption before if present
2203    if let Some((cap_start, cap_end)) = caption_before {
2204        emit_table_caption(builder, window, cap_start, cap_end, config);
2205        // Emit blank line between caption and table if present
2206        emit_caption_blank_lines(builder, window, cap_end, actual_start);
2207    }
2208
2209    // Track whether we've passed the header separator
2210    let mut past_header_sep = false;
2211    let mut in_footer_section = false;
2212    // Accumulate ABSOLUTE indices of the lines making up a multi-line row, so
2213    // each line's container prefix can be re-emitted via the window.
2214    let mut current_row_indices: Vec<usize> = Vec::new();
2215    let mut current_row_kind = SyntaxKind::TABLE_HEADER;
2216
2217    // Emit table rows - accumulate multi-line cells
2218    for idx in actual_start..end_pos {
2219        let line = view.line(idx);
2220        if let Some(sep_cols) = try_parse_grid_separator(line) {
2221            // Separator line - emit any accumulated row first
2222            if !current_row_indices.is_empty() {
2223                emit_grid_table_row(
2224                    builder,
2225                    window,
2226                    &current_row_indices,
2227                    &sep_cols,
2228                    current_row_kind,
2229                    config,
2230                );
2231                current_row_indices.clear();
2232            }
2233
2234            let is_header_sep = sep_cols.iter().any(|c| c.is_header_separator);
2235
2236            // Re-emit any continuation-line container prefix (`  > `) as
2237            // WHITESPACE/BLOCK_QUOTE_MARKER tokens before the separator text.
2238            if is_header_sep {
2239                if !past_header_sep {
2240                    // This is the header/body separator
2241                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2242                    let tail = window.emit_or_dispatch_tail(builder, idx);
2243                    emit_line_tokens(builder, tail);
2244                    builder.finish_node();
2245                    past_header_sep = true;
2246                } else {
2247                    // Footer separator
2248                    if !in_footer_section {
2249                        in_footer_section = true;
2250                    }
2251                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2252                    let tail = window.emit_or_dispatch_tail(builder, idx);
2253                    emit_line_tokens(builder, tail);
2254                    builder.finish_node();
2255                }
2256            } else {
2257                // Regular separator (row boundary)
2258                builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2259                let tail = window.emit_or_dispatch_tail(builder, idx);
2260                emit_line_tokens(builder, tail);
2261                builder.finish_node();
2262            }
2263        } else if is_grid_content_row(line) {
2264            // Content row - accumulate for multi-line cells
2265            current_row_kind = if !past_header_sep && found_header_sep {
2266                SyntaxKind::TABLE_HEADER
2267            } else if in_footer_section {
2268                SyntaxKind::TABLE_FOOTER
2269            } else {
2270                SyntaxKind::TABLE_ROW
2271            };
2272
2273            current_row_indices.push(idx);
2274        }
2275    }
2276
2277    // Emit any remaining accumulated row
2278    if !current_row_indices.is_empty() {
2279        // Use first separator's columns for cell boundaries
2280        if let Some(sep_cols) = try_parse_grid_separator(view.line(actual_start)) {
2281            emit_grid_table_row(
2282                builder,
2283                window,
2284                &current_row_indices,
2285                &sep_cols,
2286                current_row_kind,
2287                config,
2288            );
2289        }
2290    }
2291
2292    // Emit caption after if present
2293    if let Some((cap_start, cap_end)) = caption_after {
2294        emit_caption_blank_lines(builder, window, end_pos, cap_start);
2295        emit_table_caption(builder, window, cap_start, cap_end, config);
2296    }
2297
2298    builder.finish_node(); // GRID_TABLE
2299
2300    // Calculate lines consumed
2301    let table_start = caption_before
2302        .map(|(start, _)| start)
2303        .unwrap_or(actual_start);
2304    let table_end = if let Some((_, cap_end)) = caption_after {
2305        cap_end
2306    } else {
2307        end_pos
2308    };
2309
2310    Some(table_end - table_start)
2311}
2312
2313#[cfg(test)]
2314mod grid_table_tests {
2315    use super::super::container_prefix::ContainerPrefix;
2316    use super::*;
2317
2318    #[test]
2319    fn test_grid_separator_detection() {
2320        assert!(try_parse_grid_separator("+---+---+").is_some());
2321        assert!(try_parse_grid_separator("+===+===+").is_some());
2322        assert!(try_parse_grid_separator("+---------------+---------------+").is_some());
2323        assert!(try_parse_grid_separator("+:---:+").is_some()); // center aligned
2324        assert!(try_parse_grid_separator("not a separator").is_none());
2325        assert!(try_parse_grid_separator("|---|---|").is_none()); // pipe table sep
2326
2327        // A grid border must sit at column 0 of its container content; an
2328        // indented border is not a grid table (matches pandoc, which parses
2329        // an indented `+---+` as a paragraph). Detection runs on the
2330        // container-stripped line, so any remaining leading space disqualifies.
2331        assert!(try_parse_grid_separator(" +---+---+").is_none());
2332        assert!(try_parse_grid_separator("  +---+---+").is_none());
2333        assert!(try_parse_grid_separator("   +===+===+").is_none());
2334    }
2335
2336    #[test]
2337    fn test_grid_header_separator() {
2338        let cols = try_parse_grid_separator("+===+===+").unwrap();
2339        assert!(cols.iter().all(|c| c.is_header_separator));
2340
2341        let cols2 = try_parse_grid_separator("+---+---+").unwrap();
2342        assert!(cols2.iter().all(|c| !c.is_header_separator));
2343    }
2344
2345    #[test]
2346    fn test_grid_content_row_detection() {
2347        assert!(is_grid_content_row("| content | content |"));
2348        assert!(is_grid_content_row("|  |  |"));
2349        assert!(is_grid_content_row("| content +------+"));
2350        assert!(!is_grid_content_row("+---+---+")); // separator, not content
2351        assert!(!is_grid_content_row("no pipes here"));
2352    }
2353
2354    #[test]
2355    fn test_basic_grid_table() {
2356        let input = vec![
2357            "+-------+-------+",
2358            "| Col1  | Col2  |",
2359            "+=======+=======+",
2360            "| A     | B     |",
2361            "+-------+-------+",
2362            "",
2363        ];
2364
2365        let mut builder = GreenNodeBuilder::new();
2366        let prefix = ContainerPrefix::default();
2367        let window = StrippedLines::new(&input, 0, &prefix);
2368        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2369
2370        assert!(result.is_some());
2371        assert_eq!(result.unwrap(), 5);
2372    }
2373
2374    #[test]
2375    fn test_grid_table_multirow() {
2376        let input = vec![
2377            "+---------------+---------------+",
2378            "| Fruit         | Advantages    |",
2379            "+===============+===============+",
2380            "| Bananas       | - wrapper     |",
2381            "|               | - color       |",
2382            "+---------------+---------------+",
2383            "| Oranges       | - scurvy      |",
2384            "|               | - tasty       |",
2385            "+---------------+---------------+",
2386            "",
2387        ];
2388
2389        let mut builder = GreenNodeBuilder::new();
2390        let prefix = ContainerPrefix::default();
2391        let window = StrippedLines::new(&input, 0, &prefix);
2392        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2393
2394        assert!(result.is_some());
2395        assert_eq!(result.unwrap(), 9);
2396    }
2397
2398    #[test]
2399    fn test_grid_table_with_footer() {
2400        let input = vec![
2401            "+-------+-------+",
2402            "| Fruit | Price |",
2403            "+=======+=======+",
2404            "| Apple | $1.00 |",
2405            "+-------+-------+",
2406            "| Pear  | $1.50 |",
2407            "+=======+=======+",
2408            "| Total | $2.50 |",
2409            "+=======+=======+",
2410            "",
2411        ];
2412
2413        let mut builder = GreenNodeBuilder::new();
2414        let prefix = ContainerPrefix::default();
2415        let window = StrippedLines::new(&input, 0, &prefix);
2416        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2417
2418        assert!(result.is_some());
2419        assert_eq!(result.unwrap(), 9);
2420    }
2421
2422    #[test]
2423    fn test_grid_table_headerless() {
2424        let input = vec![
2425            "+-------+-------+",
2426            "| A     | B     |",
2427            "+-------+-------+",
2428            "| C     | D     |",
2429            "+-------+-------+",
2430            "",
2431        ];
2432
2433        let mut builder = GreenNodeBuilder::new();
2434        let prefix = ContainerPrefix::default();
2435        let window = StrippedLines::new(&input, 0, &prefix);
2436        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2437
2438        assert!(result.is_some());
2439        assert_eq!(result.unwrap(), 5);
2440    }
2441
2442    #[test]
2443    fn test_grid_table_with_caption_before() {
2444        let input = vec![
2445            ": Sample table",
2446            "",
2447            "+-------+-------+",
2448            "| A     | B     |",
2449            "+=======+=======+",
2450            "| C     | D     |",
2451            "+-------+-------+",
2452            "",
2453        ];
2454
2455        let mut builder = GreenNodeBuilder::new();
2456        let prefix = ContainerPrefix::default();
2457        let window = StrippedLines::new(&input, 2, &prefix);
2458        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2459
2460        assert!(result.is_some());
2461        // Should include caption + blank + table
2462        assert_eq!(result.unwrap(), 7);
2463    }
2464
2465    #[test]
2466    fn test_grid_table_with_caption_after() {
2467        let input = vec![
2468            "+-------+-------+",
2469            "| A     | B     |",
2470            "+=======+=======+",
2471            "| C     | D     |",
2472            "+-------+-------+",
2473            "",
2474            "Table: My grid table",
2475            "",
2476        ];
2477
2478        let mut builder = GreenNodeBuilder::new();
2479        let prefix = ContainerPrefix::default();
2480        let window = StrippedLines::new(&input, 0, &prefix);
2481        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2482
2483        assert!(result.is_some());
2484        // table + blank + caption
2485        assert_eq!(result.unwrap(), 7);
2486    }
2487}
2488
2489// ============================================================================
2490// Multiline Table Parsing
2491// ============================================================================
2492
2493/// Check if a line is a multiline table separator (continuous dashes).
2494/// Multiline table separators span the full width and are all dashes.
2495/// Returns Some(columns) if valid, None otherwise.
2496fn try_parse_multiline_separator(line: &str) -> Option<Vec<Column>> {
2497    let trimmed = line.trim_start();
2498    let leading_spaces = line.len() - trimmed.len();
2499
2500    // Must have leading spaces <= 3 to not be a code block
2501    if leading_spaces > 3 {
2502        return None;
2503    }
2504
2505    let trimmed = trimmed.trim_end();
2506
2507    // Must be all dashes (continuous line of dashes)
2508    if trimmed.is_empty() || !trimmed.chars().all(|c| c == '-') {
2509        return None;
2510    }
2511
2512    // Must have at least 3 dashes
2513    if trimmed.len() < 3 {
2514        return None;
2515    }
2516
2517    // This is a full-width separator - columns will be determined by column separator lines
2518    Some(vec![Column {
2519        start: leading_spaces,
2520        end: leading_spaces + trimmed.len(),
2521        alignment: Alignment::Default,
2522    }])
2523}
2524
2525/// Check if a line is a column separator line for multiline tables.
2526/// Column separators have dashes with spaces between them to define columns.
2527fn is_column_separator(line: &str) -> bool {
2528    try_parse_table_separator(line).is_some() && !line.contains('*') && !line.contains('_')
2529}
2530
2531fn is_headerless_single_row_without_blank(
2532    lines: &(impl LineView + ?Sized),
2533    row_start: usize,
2534    row_end: usize,
2535    columns: &[Column],
2536) -> bool {
2537    if row_start >= row_end {
2538        return false;
2539    }
2540
2541    if row_end - row_start == 1 {
2542        return false;
2543    }
2544
2545    let Some(last_col) = columns.last() else {
2546        return false;
2547    };
2548
2549    for i in (row_start + 1)..row_end {
2550        let (content, _) = strip_newline(lines.line(i));
2551        let prefix_end = last_col.start.min(content.len());
2552        if !content[..prefix_end].trim().is_empty() {
2553            return false;
2554        }
2555    }
2556
2557    true
2558}
2559
2560/// Try to parse a multiline table starting at the given position.
2561/// Returns the number of lines consumed if successful.
2562pub(crate) fn try_parse_multiline_table(
2563    window: &StrippedLines<'_, '_>,
2564    builder: &mut GreenNodeBuilder<'static>,
2565    config: &ParserOptions,
2566) -> Option<usize> {
2567    let lines = window.raw();
2568    let start_pos = window.pos();
2569    if start_pos >= lines.len() {
2570        return None;
2571    }
2572
2573    // Cheap gate: a multiline table's first line is either a full-width dash
2574    // separator or a column separator. Table detection runs at every block
2575    // start, so any per-line work for every paragraph that can't begin a
2576    // multiline table was quadratic on large documents. Peek just the dispatch
2577    // line via `strip_at` and bail before any further scanning.
2578    let first_line = window.strip_at(start_pos);
2579
2580    // First line can be either:
2581    // 1. A full-width dash separator (for tables with headers)
2582    // 2. A column separator (for headerless tables)
2583    let is_full_width_start = try_parse_multiline_separator(first_line).is_some();
2584    let is_column_sep_start = !is_full_width_start && is_column_separator(first_line);
2585    if !is_full_width_start && !is_column_sep_start {
2586        return None;
2587    }
2588
2589    // Detection scans read the container-prefix-stripped view lazily through the
2590    // window (see `LineView`) so a multiline table nested in `list → blockquote`
2591    // (e.g. `- > ----`) has its `  > ` prefix removed before the
2592    // separator/blank-row shape checks. The interior `>`-only row then strips to
2593    // `""` and registers as a blank row separator. With an empty prefix the
2594    // stripped view equals the raw lines. Scans stop at the first blank/closing
2595    // line, so only a bounded range is stripped. Emission re-emits the prefix
2596    // bytes as tokens via the window; captions read raw `lines`.
2597    let headerless_columns = if is_column_sep_start {
2598        try_parse_table_separator(window.line(start_pos))
2599    } else {
2600        None
2601    };
2602
2603    // Look ahead to find the structure
2604    let mut pos = start_pos + 1;
2605    let mut found_column_sep = is_column_sep_start; // Already found if headerless
2606    let mut column_sep_pos = if is_column_sep_start { start_pos } else { 0 };
2607    let mut has_header = false;
2608    let mut found_blank_line = false;
2609    let mut found_closing_sep = false;
2610    let mut content_line_count = 0usize;
2611
2612    // Scan for header section and column separator
2613    while pos < lines.len() {
2614        let line = window.line(pos);
2615
2616        // Check for column separator (defines columns) - only if we started with full-width
2617        if is_full_width_start && is_column_separator(line) && !found_column_sep {
2618            found_column_sep = true;
2619            column_sep_pos = pos;
2620            has_header = pos > start_pos + 1; // Has header if there's content before column sep
2621            pos += 1;
2622            continue;
2623        }
2624
2625        // Check for blank line (row separator in body)
2626        if line.trim().is_empty() {
2627            found_blank_line = true;
2628            pos += 1;
2629            // Check if next line is a valid closing separator for this table shape.
2630            if pos < lines.len() {
2631                let next = window.line(pos);
2632                let is_valid_closer = if is_full_width_start {
2633                    try_parse_multiline_separator(next).is_some()
2634                } else {
2635                    is_column_separator(next)
2636                };
2637                if is_valid_closer {
2638                    found_closing_sep = true;
2639                    pos += 1; // Include the closing separator
2640                    break;
2641                }
2642            }
2643            continue;
2644        }
2645
2646        // Check for closing full-width dashes (only for full-width-start tables).
2647        if is_full_width_start && try_parse_multiline_separator(line).is_some() {
2648            found_closing_sep = true;
2649            pos += 1;
2650            break;
2651        }
2652
2653        // Check for closing column separator (for headerless tables)
2654        if is_column_sep_start && is_column_separator(line) && content_line_count > 0 {
2655            found_closing_sep = true;
2656            pos += 1;
2657            break;
2658        }
2659
2660        // Content row
2661        content_line_count += 1;
2662        pos += 1;
2663    }
2664
2665    // Must have found a column separator to be a valid multiline table
2666    if !found_column_sep {
2667        return None;
2668    }
2669
2670    // Must have had at least one blank line between rows (distinguishes from simple tables)
2671    if !found_blank_line {
2672        if !is_column_sep_start {
2673            return None;
2674        }
2675        let columns = headerless_columns.as_deref()?;
2676        if !is_headerless_single_row_without_blank(window, start_pos + 1, pos - 1, columns) {
2677            return None;
2678        }
2679    }
2680
2681    // Must have a closing separator
2682    if !found_closing_sep {
2683        return None;
2684    }
2685
2686    // Must have consumed more than just the opening separator
2687    if pos <= start_pos + 2 {
2688        return None;
2689    }
2690
2691    let end_pos = pos;
2692
2693    // Extract column boundaries from the separator line
2694    let columns = try_parse_table_separator(window.line(column_sep_pos))
2695        .expect("Column separator must be valid");
2696
2697    // Check for caption before table
2698    let caption_before = find_caption_before_table(window, start_pos);
2699
2700    // Check for caption after table
2701    let caption_after = if caption_before.is_some() {
2702        None
2703    } else {
2704        find_caption_after_table(window, end_pos)
2705    };
2706
2707    // Build the multiline table
2708    builder.start_node(SyntaxKind::MULTILINE_TABLE.into());
2709
2710    // Emit caption before if present
2711    if let Some((cap_start, cap_end)) = caption_before {
2712        emit_table_caption(builder, window, cap_start, cap_end, config);
2713        // Emit blank line between caption and table if present
2714        emit_caption_blank_lines(builder, window, cap_end, start_pos);
2715    }
2716
2717    // Emit opening separator. The dispatch line's prefix was already consumed
2718    // by core (`dispatch_tail`); a non-dispatch start (caption-before case)
2719    // re-emits its `  > ` prefix via `emit_prefix_at`.
2720    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2721    let tail = window.emit_or_dispatch_tail(builder, start_pos);
2722    emit_line_tokens(builder, tail);
2723    builder.finish_node();
2724
2725    // Track state for emitting. Accumulate ABSOLUTE indices of the lines making
2726    // up a multi-line row so each line's container prefix can be re-emitted via
2727    // the window.
2728    let mut in_header = has_header;
2729    let mut current_row_indices: Vec<usize> = Vec::new();
2730
2731    for i in (start_pos + 1)..end_pos {
2732        let line = window.line(i);
2733        // Column separator (header/body divider)
2734        if i == column_sep_pos {
2735            // Emit any accumulated header lines
2736            if !current_row_indices.is_empty() {
2737                emit_multiline_table_row(
2738                    builder,
2739                    window,
2740                    &current_row_indices,
2741                    &columns,
2742                    SyntaxKind::TABLE_HEADER,
2743                    config,
2744                );
2745                current_row_indices.clear();
2746            }
2747
2748            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2749            let tail = window.emit_or_dispatch_tail(builder, i);
2750            emit_line_tokens(builder, tail);
2751            builder.finish_node();
2752            in_header = false;
2753            continue;
2754        }
2755
2756        // Closing separator (full-width or column separator at end)
2757        if try_parse_multiline_separator(line).is_some() || is_column_separator(line) {
2758            // Emit any accumulated row lines
2759            if !current_row_indices.is_empty() {
2760                let kind = if in_header {
2761                    SyntaxKind::TABLE_HEADER
2762                } else {
2763                    SyntaxKind::TABLE_ROW
2764                };
2765                emit_multiline_table_row(
2766                    builder,
2767                    window,
2768                    &current_row_indices,
2769                    &columns,
2770                    kind,
2771                    config,
2772                );
2773                current_row_indices.clear();
2774            }
2775
2776            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2777            let tail = window.emit_or_dispatch_tail(builder, i);
2778            emit_line_tokens(builder, tail);
2779            builder.finish_node();
2780            continue;
2781        }
2782
2783        // Blank line (row separator)
2784        if line.trim().is_empty() {
2785            // Emit accumulated row
2786            if !current_row_indices.is_empty() {
2787                let kind = if in_header {
2788                    SyntaxKind::TABLE_HEADER
2789                } else {
2790                    SyntaxKind::TABLE_ROW
2791                };
2792                emit_multiline_table_row(
2793                    builder,
2794                    window,
2795                    &current_row_indices,
2796                    &columns,
2797                    kind,
2798                    config,
2799                );
2800                current_row_indices.clear();
2801            }
2802
2803            // Re-emit the interior `>`-only separator row's container prefix
2804            // (`  > `) inside the BLANK_LINE node so it round-trips losslessly.
2805            builder.start_node(SyntaxKind::BLANK_LINE.into());
2806            let tail = window.emit_or_dispatch_tail(builder, i);
2807            builder.token(SyntaxKind::BLANK_LINE.into(), tail);
2808            builder.finish_node();
2809            continue;
2810        }
2811
2812        // Content line - accumulate for current row
2813        current_row_indices.push(i);
2814    }
2815
2816    // Emit any remaining accumulated lines
2817    if !current_row_indices.is_empty() {
2818        let kind = if in_header {
2819            SyntaxKind::TABLE_HEADER
2820        } else {
2821            SyntaxKind::TABLE_ROW
2822        };
2823        emit_multiline_table_row(
2824            builder,
2825            window,
2826            &current_row_indices,
2827            &columns,
2828            kind,
2829            config,
2830        );
2831    }
2832
2833    // Emit caption after if present
2834    if let Some((cap_start, cap_end)) = caption_after {
2835        emit_caption_blank_lines(builder, window, end_pos, cap_start);
2836        emit_table_caption(builder, window, cap_start, cap_end, config);
2837    }
2838
2839    builder.finish_node(); // MultilineTable
2840
2841    // Calculate lines consumed
2842    let table_start = caption_before.map(|(start, _)| start).unwrap_or(start_pos);
2843    let table_end = if let Some((_, cap_end)) = caption_after {
2844        cap_end
2845    } else {
2846        end_pos
2847    };
2848
2849    Some(table_end - table_start)
2850}
2851
2852/// Extract cell contents from first line only (for CST emission).
2853/// Multi-line content will be in continuation TEXT tokens.
2854fn extract_first_line_cell_contents(line: &str, columns: &[Column]) -> Vec<String> {
2855    let (line_content, _) = strip_newline(line);
2856    let mut cells = Vec::new();
2857
2858    for column in columns.iter() {
2859        let column_start = column_offset_to_byte_index(line_content, column.start);
2860        let column_end = column_offset_to_byte_index(line_content, column.end);
2861
2862        // Extract FULL text for this column (including whitespace)
2863        let cell_text = if column_start < column_end {
2864            &line_content[column_start..column_end]
2865        } else if column_start < line_content.len() {
2866            &line_content[column_start..]
2867        } else {
2868            ""
2869        };
2870
2871        cells.push(cell_text.to_string());
2872    }
2873
2874    cells
2875}
2876
2877/// Emit a multiline table row with inline parsing (Phase 7.1).
2878///
2879/// `indices` are ABSOLUTE line indices into the window's raw buffer; each
2880/// physical line re-emits its container prefix (`  > `) via the window before
2881/// its content. With an empty prefix the tails equal the raw lines, so emission
2882/// is byte-identical to the pre-window path.
2883fn emit_multiline_table_row(
2884    builder: &mut GreenNodeBuilder<'static>,
2885    window: &StrippedLines<'_, '_>,
2886    indices: &[usize],
2887    columns: &[Column],
2888    kind: SyntaxKind,
2889    config: &ParserOptions,
2890) {
2891    if indices.is_empty() {
2892        return;
2893    }
2894
2895    builder.start_node(kind.into());
2896
2897    // Emit the first line's container prefix as tokens, then slice cells from
2898    // the prefix-stripped tail (for CST losslessness, only the first physical
2899    // line is parsed into cells; continuation lines stay verbatim TEXT).
2900    let first_line = window.emit_or_dispatch_tail(builder, indices[0]);
2901    let cell_contents = extract_first_line_cell_contents(first_line, columns);
2902    let (trimmed, newline_str) = strip_newline(first_line);
2903    let mut current_pos = 0;
2904
2905    for (col_idx, column) in columns.iter().enumerate() {
2906        let cell_text = &cell_contents[col_idx];
2907        let cell_start = column_offset_to_byte_index(trimmed, column.start);
2908        let cell_end = column_offset_to_byte_index(trimmed, column.end);
2909
2910        // Emit whitespace before cell
2911        if current_pos < cell_start {
2912            builder.token(
2913                SyntaxKind::WHITESPACE.into(),
2914                &trimmed[current_pos..cell_start],
2915            );
2916        }
2917
2918        // Emit cell with inline parsing (first line content only)
2919        emit_table_cell(builder, cell_text, config);
2920
2921        current_pos = cell_end;
2922    }
2923
2924    // Emit trailing whitespace
2925    if current_pos < trimmed.len() {
2926        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
2927    }
2928
2929    // Emit newline
2930    if !newline_str.is_empty() {
2931        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2932    }
2933
2934    // Emit continuation lines as TEXT to preserve exact line structure,
2935    // re-emitting each line's container prefix first.
2936    for &idx in &indices[1..] {
2937        let tail = window.emit_or_dispatch_tail(builder, idx);
2938        emit_line_tokens(builder, tail);
2939    }
2940
2941    builder.finish_node();
2942}
2943
2944#[cfg(test)]
2945mod multiline_table_tests {
2946    use super::super::container_prefix::ContainerPrefix;
2947    use super::*;
2948    use crate::syntax::SyntaxNode;
2949
2950    #[test]
2951    fn test_multiline_separator_detection() {
2952        assert!(
2953            try_parse_multiline_separator(
2954                "-------------------------------------------------------------"
2955            )
2956            .is_some()
2957        );
2958        assert!(try_parse_multiline_separator("---").is_some());
2959        assert!(try_parse_multiline_separator("  -----").is_some()); // with leading spaces
2960        assert!(try_parse_multiline_separator("--").is_none()); // too short
2961        assert!(try_parse_multiline_separator("--- ---").is_none()); // has spaces
2962        assert!(try_parse_multiline_separator("+---+").is_none()); // grid separator
2963    }
2964
2965    #[test]
2966    fn test_basic_multiline_table() {
2967        let input = vec![
2968            "-------------------------------------------------------------",
2969            " Centered   Default           Right Left",
2970            "  Header    Aligned         Aligned Aligned",
2971            "----------- ------- --------------- -------------------------",
2972            "   First    row                12.0 Example of a row that",
2973            "                                    spans multiple lines.",
2974            "",
2975            "  Second    row                 5.0 Here's another one.",
2976            "-------------------------------------------------------------",
2977            "",
2978        ];
2979
2980        let mut builder = GreenNodeBuilder::new();
2981        let prefix = ContainerPrefix::default();
2982        let window = StrippedLines::new(&input, 0, &prefix);
2983        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
2984
2985        assert!(result.is_some());
2986        assert_eq!(result.unwrap(), 9);
2987    }
2988
2989    #[test]
2990    fn test_multiline_table_headerless() {
2991        let input = vec![
2992            "----------- ------- --------------- -------------------------",
2993            "   First    row                12.0 Example of a row that",
2994            "                                    spans multiple lines.",
2995            "",
2996            "  Second    row                 5.0 Here's another one.",
2997            "----------- ------- --------------- -------------------------",
2998            "",
2999        ];
3000
3001        let mut builder = GreenNodeBuilder::new();
3002        let prefix = ContainerPrefix::default();
3003        let window = StrippedLines::new(&input, 0, &prefix);
3004        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3005
3006        assert!(result.is_some());
3007        assert_eq!(result.unwrap(), 6);
3008    }
3009
3010    #[test]
3011    fn test_multiline_table_headerless_single_line_is_not_multiline() {
3012        let input = vec![
3013            "-------     ------ ----------   -------",
3014            "     12     12        12             12",
3015            "-------     ------ ----------   -------",
3016            "",
3017            "Not part of table.",
3018            "",
3019        ];
3020
3021        let mut builder = GreenNodeBuilder::new();
3022        let prefix = ContainerPrefix::default();
3023        let window = StrippedLines::new(&input, 0, &prefix);
3024        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3025
3026        assert!(result.is_none());
3027    }
3028
3029    #[test]
3030    fn test_multiline_table_headerless_single_row_continuation_without_blank_line() {
3031        let input = vec![
3032            "----------  ---------  -----------  ---------------------------",
3033            "   First    row               12.0  Example of a row that spans",
3034            "                                    multiple lines.",
3035            "----------  ---------  -----------  ---------------------------",
3036            "",
3037        ];
3038
3039        let mut builder = GreenNodeBuilder::new();
3040        let prefix = ContainerPrefix::default();
3041        let window = StrippedLines::new(&input, 0, &prefix);
3042        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3043
3044        assert!(result.is_some());
3045        assert_eq!(result.unwrap(), 4);
3046    }
3047
3048    #[test]
3049    fn test_multiline_table_with_caption() {
3050        let input = vec![
3051            "-------------------------------------------------------------",
3052            " Col1       Col2",
3053            "----------- -------",
3054            "   A        B",
3055            "",
3056            "-------------------------------------------------------------",
3057            "",
3058            "Table: Here's the caption.",
3059            "",
3060        ];
3061
3062        let mut builder = GreenNodeBuilder::new();
3063        let prefix = ContainerPrefix::default();
3064        let window = StrippedLines::new(&input, 0, &prefix);
3065        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3066
3067        assert!(result.is_some());
3068        // table (6 lines) + blank + caption
3069        assert_eq!(result.unwrap(), 8);
3070    }
3071
3072    #[test]
3073    fn test_multiline_table_single_row() {
3074        let input = vec![
3075            "---------------------------------------------",
3076            " Header1    Header2",
3077            "----------- -----------",
3078            "   Data     More data",
3079            "",
3080            "---------------------------------------------",
3081            "",
3082        ];
3083
3084        let mut builder = GreenNodeBuilder::new();
3085        let prefix = ContainerPrefix::default();
3086        let window = StrippedLines::new(&input, 0, &prefix);
3087        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3088
3089        assert!(result.is_some());
3090        assert_eq!(result.unwrap(), 6);
3091    }
3092
3093    #[test]
3094    fn test_headerless_multiline_table_does_not_close_on_full_width_rule() {
3095        let input = vec![
3096            "- - - - -",
3097            "Third section with underscores.",
3098            "",
3099            "_____",
3100            "",
3101            "> Quote before rule",
3102            ">",
3103            "> ***",
3104            ">",
3105            "> Quote after rule",
3106            "",
3107            "Final paragraph.",
3108            "",
3109            "Here's a horizontal rule:",
3110            "",
3111            "---",
3112            "Text directly after the horizontal rule.",
3113            "",
3114        ];
3115
3116        let mut builder = GreenNodeBuilder::new();
3117        let prefix = ContainerPrefix::default();
3118        let window = StrippedLines::new(&input, 0, &prefix);
3119        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3120
3121        assert!(result.is_none());
3122    }
3123
3124    #[test]
3125    fn test_not_multiline_table() {
3126        // Simple table should not be parsed as multiline
3127        let input = vec![
3128            "  Right     Left     Center     Default",
3129            "-------     ------ ----------   -------",
3130            "     12     12        12            12",
3131            "",
3132        ];
3133
3134        let mut builder = GreenNodeBuilder::new();
3135        let prefix = ContainerPrefix::default();
3136        let window = StrippedLines::new(&input, 0, &prefix);
3137        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3138
3139        // Should not parse because first line isn't a full-width separator
3140        assert!(result.is_none());
3141    }
3142
3143    // Phase 7.1: Unit tests for emit_table_cell() helper
3144    #[test]
3145    fn test_emit_table_cell_plain_text() {
3146        let mut builder = GreenNodeBuilder::new();
3147        emit_table_cell(&mut builder, "Cell", &ParserOptions::default());
3148        let green = builder.finish();
3149        let node = SyntaxNode::new_root(green);
3150
3151        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3152        assert_eq!(node.text(), "Cell");
3153
3154        // Should have TEXT child
3155        let children: Vec<_> = node.children_with_tokens().collect();
3156        assert_eq!(children.len(), 1);
3157        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
3158    }
3159
3160    #[test]
3161    fn test_emit_table_cell_with_emphasis() {
3162        let mut builder = GreenNodeBuilder::new();
3163        emit_table_cell(&mut builder, "*italic*", &ParserOptions::default());
3164        let green = builder.finish();
3165        let node = SyntaxNode::new_root(green);
3166
3167        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3168        assert_eq!(node.text(), "*italic*");
3169
3170        // Should have EMPHASIS child
3171        let children: Vec<_> = node.children().collect();
3172        assert_eq!(children.len(), 1);
3173        assert_eq!(children[0].kind(), SyntaxKind::EMPHASIS);
3174    }
3175
3176    #[test]
3177    fn test_emit_table_cell_with_code() {
3178        let mut builder = GreenNodeBuilder::new();
3179        emit_table_cell(&mut builder, "`code`", &ParserOptions::default());
3180        let green = builder.finish();
3181        let node = SyntaxNode::new_root(green);
3182
3183        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3184        assert_eq!(node.text(), "`code`");
3185
3186        // Should have CODE_SPAN child
3187        let children: Vec<_> = node.children().collect();
3188        assert_eq!(children.len(), 1);
3189        assert_eq!(children[0].kind(), SyntaxKind::INLINE_CODE);
3190    }
3191
3192    #[test]
3193    fn test_emit_table_cell_with_link() {
3194        let mut builder = GreenNodeBuilder::new();
3195        emit_table_cell(&mut builder, "[text](url)", &ParserOptions::default());
3196        let green = builder.finish();
3197        let node = SyntaxNode::new_root(green);
3198
3199        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3200        assert_eq!(node.text(), "[text](url)");
3201
3202        // Should have LINK child
3203        let children: Vec<_> = node.children().collect();
3204        assert_eq!(children.len(), 1);
3205        assert_eq!(children[0].kind(), SyntaxKind::LINK);
3206    }
3207
3208    #[test]
3209    fn test_emit_table_cell_with_strong() {
3210        let mut builder = GreenNodeBuilder::new();
3211        emit_table_cell(&mut builder, "**bold**", &ParserOptions::default());
3212        let green = builder.finish();
3213        let node = SyntaxNode::new_root(green);
3214
3215        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3216        assert_eq!(node.text(), "**bold**");
3217
3218        // Should have STRONG child
3219        let children: Vec<_> = node.children().collect();
3220        assert_eq!(children.len(), 1);
3221        assert_eq!(children[0].kind(), SyntaxKind::STRONG);
3222    }
3223
3224    #[test]
3225    fn test_emit_table_cell_mixed_inline() {
3226        let mut builder = GreenNodeBuilder::new();
3227        emit_table_cell(
3228            &mut builder,
3229            "Text **bold** and `code`",
3230            &ParserOptions::default(),
3231        );
3232        let green = builder.finish();
3233        let node = SyntaxNode::new_root(green);
3234
3235        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3236        assert_eq!(node.text(), "Text **bold** and `code`");
3237
3238        // Should have multiple children: TEXT, STRONG, TEXT, CODE_SPAN
3239        let children: Vec<_> = node.children_with_tokens().collect();
3240        assert!(children.len() >= 4);
3241
3242        // Check some expected types
3243        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
3244        assert_eq!(children[1].kind(), SyntaxKind::STRONG);
3245    }
3246
3247    #[test]
3248    fn test_emit_table_cell_empty() {
3249        let mut builder = GreenNodeBuilder::new();
3250        emit_table_cell(&mut builder, "", &ParserOptions::default());
3251        let green = builder.finish();
3252        let node = SyntaxNode::new_root(green);
3253
3254        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3255        assert_eq!(node.text(), "");
3256
3257        // Empty cell should have no children
3258        let children: Vec<_> = node.children_with_tokens().collect();
3259        assert_eq!(children.len(), 0);
3260    }
3261
3262    #[test]
3263    fn test_emit_table_cell_escaped_pipe() {
3264        let mut builder = GreenNodeBuilder::new();
3265        emit_table_cell(&mut builder, r"A \| B", &ParserOptions::default());
3266        let green = builder.finish();
3267        let node = SyntaxNode::new_root(green);
3268
3269        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3270        // The escaped pipe should be preserved
3271        assert_eq!(node.text(), r"A \| B");
3272    }
3273}