Skip to main content

panache_parser/parser/blocks/
tables.rs

1//! Simple table parsing for Pandoc's simple_tables extension.
2
3use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6use unicode_width::UnicodeWidthChar;
7
8use crate::parser::utils::attributes::{
9    emit_attribute_node, try_parse_trailing_attributes_with_pos,
10};
11use crate::parser::utils::helpers::{emit_line_tokens, emit_separator_tokens, strip_newline};
12use crate::parser::utils::inline_emission;
13
14use super::container_prefix::StrippedLines;
15
16/// Read-only indexed view over lines for table detection scans. Two
17/// backings:
18///
19/// - `[&str]` — a raw, unstripped line buffer, used by callers that scan
20///   the source directly (the block dispatcher's caption lookahead, list
21///   and definition-list probes).
22/// - [`StrippedLines`] / [`UniformStripView`] — a container-prefix-stripped
23///   view that strips each line lazily on access via
24///   [`StrippedLines::strip_at`]. Detection scans touch only a bounded
25///   range (they stop at the first blank line), so this stays
26///   O(scanned lines) rather than materializing the whole buffer. The old
27///   `strip_all` collected `0..raw.len()` on every call, which was
28///   quadratic when table detection runs at every block start inside a
29///   large blockquote or list.
30pub(crate) trait LineView {
31    /// The line at absolute index `i`.
32    fn line(&self, i: usize) -> &str;
33    /// Total number of lines (absolute upper bound for indices).
34    fn line_count(&self) -> usize;
35}
36
37impl LineView for [&str] {
38    fn line(&self, i: usize) -> &str {
39        self[i]
40    }
41    fn line_count(&self) -> usize {
42        self.len()
43    }
44}
45
46impl<'a, 'p> LineView for StrippedLines<'a, 'p> {
47    fn line(&self, i: usize) -> &str {
48        self.strip_at(i)
49    }
50    fn line_count(&self) -> usize {
51        self.raw().len()
52    }
53}
54
55/// A [`LineView`] over a [`StrippedLines`] window that strips *every* line —
56/// including the dispatch line — with the full container strip rather than
57/// the emission-safe line-0 strip. Grid-border detection needs this: a
58/// `+---+` border sitting at column 0 of a list item's inner content must
59/// not retain the list indent, or the strict column-0 check in
60/// `try_parse_grid_separator` would reject it. Emission still goes through
61/// the window, which preserves the indent bytes. This reproduces the old
62/// grid path's `stripped[dispatch] = prefix.strip(...)` override, but
63/// lazily.
64pub(crate) struct UniformStripView<'s, 'a, 'p>(&'s StrippedLines<'a, 'p>);
65
66impl<'s, 'a, 'p> LineView for UniformStripView<'s, 'a, 'p> {
67    fn line(&self, i: usize) -> &str {
68        self.0.prefix().strip(self.0.raw()[i])
69    }
70    fn line_count(&self) -> usize {
71        self.0.raw().len()
72    }
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76pub enum Alignment {
77    Left,
78    Right,
79    Center,
80    Default,
81}
82
83/// Column information extracted from the separator line.
84#[derive(Debug, Clone)]
85pub(crate) struct Column {
86    /// Start position (byte index) in the line
87    start: usize,
88    /// End position (byte index) in the line
89    end: usize,
90    /// Column alignment
91    alignment: Alignment,
92}
93
94/// Try to detect if a line is a table separator line.
95/// Returns Some(column positions) if it's a valid separator.
96pub(crate) fn try_parse_table_separator(line: &str) -> Option<Vec<Column>> {
97    let trimmed = line.trim_start();
98    // Strip trailing newline if present (CRLF or LF)
99    let (trimmed, newline_str) = strip_newline(trimmed);
100    let leading_spaces = line.len() - trimmed.len() - newline_str.len();
101
102    // Must have leading spaces <= 3 to not be a code block
103    if leading_spaces > 3 {
104        return None;
105    }
106
107    // Simple tables only use dashed separators.
108    if trimmed.contains('*') || trimmed.contains('_') {
109        return None;
110    }
111
112    // Must contain at least one dash
113    if !trimmed.contains('-') {
114        return None;
115    }
116
117    // A separator line consists of dashes and spaces
118    if !trimmed.chars().all(|c| c == '-' || c == ' ') {
119        return None;
120    }
121
122    // Must not be a horizontal rule.
123    let dash_groups: Vec<_> = trimmed.split(' ').filter(|s| !s.is_empty()).collect();
124    if dash_groups.len() <= 1 {
125        return None;
126    }
127
128    // Extract column positions from dash groups
129    let columns = extract_columns(trimmed, leading_spaces);
130
131    if columns.is_empty() {
132        return None;
133    }
134
135    Some(columns)
136}
137
138/// Extract column positions from a separator line.
139fn extract_columns(separator: &str, offset: usize) -> Vec<Column> {
140    let mut columns = Vec::new();
141    let mut in_dashes = false;
142    let mut col_start = 0;
143
144    for (i, ch) in separator.char_indices() {
145        match ch {
146            '-' if !in_dashes => {
147                col_start = i + offset;
148                in_dashes = true;
149            }
150            ' ' if in_dashes => {
151                columns.push(Column {
152                    start: col_start,
153                    end: i + offset,
154                    alignment: Alignment::Default, // Will be determined later
155                });
156                in_dashes = false;
157            }
158            _ => {}
159        }
160    }
161
162    // Handle last column
163    if in_dashes {
164        columns.push(Column {
165            start: col_start,
166            end: separator.len() + offset,
167            alignment: Alignment::Default,
168        });
169    }
170
171    columns
172}
173
174/// Convert a character column offset into a UTF-8 byte index for `line`.
175///
176/// Simple-table column boundaries come from ASCII separator lines where
177/// character and byte offsets are identical. Data rows may contain multibyte
178/// characters, so we must remap offsets before slicing.
179fn column_offset_to_byte_index(line: &str, offset: usize) -> usize {
180    line.char_indices()
181        .nth(offset)
182        .map_or(line.len(), |(byte_idx, _)| byte_idx)
183}
184
185/// Try to parse a table caption from a line.
186/// Returns Some((prefix_len, caption_text)) if it's a caption.
187fn try_parse_caption_prefix(line: &str) -> Option<(usize, &str)> {
188    let trimmed = line.trim_start();
189    let leading_spaces = line.len() - trimmed.len();
190
191    // Must have leading spaces <= 3 to not be a code block
192    if leading_spaces > 3 {
193        return None;
194    }
195
196    // Check for "Table:" or "table:" or just ":".
197    if let Some(rest) = trimmed.strip_prefix("Table:") {
198        Some((leading_spaces + 6, rest))
199    } else if let Some(rest) = trimmed.strip_prefix("table:") {
200        Some((leading_spaces + 6, rest))
201    } else if let Some(rest) = trimmed.strip_prefix(':') {
202        // Just ":" caption markers must be followed by whitespace (Pandoc-style).
203        // This avoids accidentally treating constructs like fenced div fences ":::" as captions.
204        if rest.starts_with(|c: char| c.is_whitespace()) {
205            Some((leading_spaces + 1, rest))
206        } else {
207            None
208        }
209    } else {
210        None
211    }
212}
213
214/// Check if a line could be the start of a table caption.
215fn is_table_caption_start(line: &str) -> bool {
216    try_parse_caption_prefix(line).is_some()
217}
218
219fn is_bare_colon_caption_start(line: &str) -> bool {
220    let trimmed = line.trim_start();
221    trimmed.starts_with(':') && !trimmed.starts_with("::") && !trimmed.starts_with(":::")
222}
223
224fn bare_colon_caption_looks_like_definition_code_block(line: &str) -> bool {
225    let Some((_, rest)) = try_parse_caption_prefix(line) else {
226        return false;
227    };
228    let trimmed = rest.trim_start();
229    trimmed.starts_with("```") || trimmed.starts_with("~~~")
230}
231
232fn line_is_fenced_div_fence(line: &str) -> bool {
233    let trimmed = line.trim_start();
234    let colon_count = trimmed.chars().take_while(|&c| c == ':').count();
235    if colon_count < 3 {
236        return false;
237    }
238    let rest = &trimmed[colon_count..];
239    rest.is_empty() || rest.starts_with(char::is_whitespace)
240}
241
242fn is_valid_caption_start_before_table(lines: &(impl LineView + ?Sized), pos: usize) -> bool {
243    if !is_table_caption_start(lines.line(pos)) {
244        return false;
245    }
246
247    if is_bare_colon_caption_start(lines.line(pos))
248        && bare_colon_caption_looks_like_definition_code_block(lines.line(pos))
249    {
250        return false;
251    }
252
253    // Avoid stealing definition-list definitions (":   ...") as table captions.
254    if is_bare_colon_caption_start(lines.line(pos))
255        && pos > 0
256        && !lines.line(pos - 1).trim().is_empty()
257        && !line_is_fenced_div_fence(lines.line(pos - 1))
258    {
259        return false;
260    }
261    true
262}
263
264/// Check if a line could be the start of a grid table.
265/// Grid tables start with a separator line like +---+---+ or +===+===+
266fn is_grid_table_start(line: &str) -> bool {
267    try_parse_grid_separator(line).is_some()
268}
269
270/// Check if a line could be the start of a multiline table.
271/// Multiline tables start with either:
272/// - A full-width dash separator (----)
273/// - A column separator with dashes and spaces (---- ---- ----)
274fn is_multiline_table_start(line: &str) -> bool {
275    try_parse_multiline_separator(line).is_some() || is_column_separator(line)
276}
277
278/// Check if there's a table following a potential caption at this position.
279/// This is used to avoid parsing a caption as a paragraph when it belongs to a table.
280pub(crate) fn is_caption_followed_by_table(
281    lines: &(impl LineView + ?Sized),
282    caption_pos: usize,
283) -> bool {
284    if caption_pos >= lines.line_count() {
285        return false;
286    }
287
288    // Caption must start with a caption prefix
289    if !is_valid_caption_start_before_table(lines, caption_pos) {
290        return false;
291    }
292
293    let mut pos = caption_pos + 1;
294
295    // Skip continuation lines of caption (non-blank lines).
296    // Stop at fenced-div fences (`:::`) — those close the enclosing div and
297    // must not be folded into the caption.
298    while pos < lines.line_count()
299        && !lines.line(pos).trim().is_empty()
300        && !line_is_fenced_div_fence(lines.line(pos))
301    {
302        // If we hit a table separator, we found a table
303        if try_parse_table_separator(lines.line(pos)).is_some() {
304            return true;
305        }
306        pos += 1;
307    }
308
309    // Skip one blank line
310    if pos < lines.line_count() && lines.line(pos).trim().is_empty() {
311        pos += 1;
312    }
313
314    // Check for a table grid at the next position.
315    table_grid_starts_at(lines, pos)
316}
317
318/// Cheap lookahead: does any table kind's grid begin at absolute line `pos`?
319///
320/// This is the lightweight twin of the block dispatcher's `first_kind_at`,
321/// which answers the same "is there a table here?" question by attempting a
322/// full parse of each kind in turn. We deliberately do **not** call that from
323/// the caption lookahead: caption detection runs at every block start, and a
324/// full per-kind parse there would reintroduce the O(n²) blowup the bounded
325/// separator probe exists to avoid. To keep the two predicates in agreement,
326/// this calls the same primitive separator detectors the real parsers gate on
327/// (`is_grid_table_start` → `try_parse_grid_separator`, `is_multiline_table_start`
328/// → `try_parse_multiline_separator`/`is_column_separator`,
329/// `try_parse_table_separator`, `try_parse_pipe_separator`).
330fn table_grid_starts_at(lines: &(impl LineView + ?Sized), pos: usize) -> bool {
331    if pos >= lines.line_count() {
332        return false;
333    }
334    let line = lines.line(pos);
335
336    // Grid table start (`+---+---+` or `+===+===+`).
337    if is_grid_table_start(line) {
338        return true;
339    }
340
341    // Multiline table start (`----` or `---- ---- ----`).
342    if is_multiline_table_start(line) {
343        return true;
344    }
345
346    // Separator line (simple/pipe table, headerless).
347    if try_parse_table_separator(line).is_some() {
348        return true;
349    }
350
351    // Header line followed by a separator (simple/pipe table with header).
352    if pos + 1 < lines.line_count() && !line.trim().is_empty() {
353        let next_line = lines.line(pos + 1);
354        if try_parse_table_separator(next_line).is_some()
355            || try_parse_pipe_separator(next_line).is_some()
356        {
357            return true;
358        }
359    }
360
361    false
362}
363
364fn caption_range_starting_at(
365    lines: &(impl LineView + ?Sized),
366    start: usize,
367) -> Option<(usize, usize)> {
368    if start >= lines.line_count() || !is_table_caption_start(lines.line(start)) {
369        return None;
370    }
371    let mut end = start + 1;
372    while end < lines.line_count()
373        && !lines.line(end).trim().is_empty()
374        && !line_is_fenced_div_fence(lines.line(end))
375    {
376        end += 1;
377    }
378    Some((start, end))
379}
380
381/// Find caption before table (if any).
382/// Returns (caption_start, caption_end) positions, or None.
383fn find_caption_before_table(
384    lines: &(impl LineView + ?Sized),
385    table_start: usize,
386) -> Option<(usize, usize)> {
387    if table_start == 0 {
388        return None;
389    }
390
391    // Look backward for a caption
392    // Caption must be immediately before table (with possible blank line between)
393    let mut pos = table_start - 1;
394
395    // Skip one blank line if present
396    if lines.line(pos).trim().is_empty() {
397        if pos == 0 {
398            return None;
399        }
400        pos -= 1;
401    }
402
403    // Now pos points to the last non-blank line before the table
404    // This could be the last line of a multiline caption, or a single-line caption
405    let caption_end = pos + 1; // End is exclusive
406
407    // If this line is NOT a caption start, it might be a continuation line
408    // Scan backward through non-blank lines to find the caption start
409    if !is_valid_caption_start_before_table(lines, pos) {
410        // Not a caption start - check if there's a caption start above
411        let mut scan_pos = pos;
412        while scan_pos > 0 {
413            scan_pos -= 1;
414            let line = lines.line(scan_pos);
415
416            // If we hit a blank line or fenced-div fence, we've gone too far
417            if line.trim().is_empty() || line_is_fenced_div_fence(line) {
418                return None;
419            }
420
421            // If we find a caption start, this is the beginning of the multiline caption
422            if is_valid_caption_start_before_table(lines, scan_pos) {
423                if scan_pos > 0 && !lines.line(scan_pos - 1).trim().is_empty() {
424                    return None;
425                }
426                if previous_nonblank_looks_like_table(lines, scan_pos) {
427                    return None;
428                }
429                return Some((scan_pos, caption_end));
430            }
431        }
432        // Scanned to beginning without finding caption start
433        None
434    } else {
435        if pos > 0 && !lines.line(pos - 1).trim().is_empty() {
436            return None;
437        }
438        if previous_nonblank_looks_like_table(lines, pos) {
439            return None;
440        }
441        // This line is a caption start - return the range
442        Some((pos, caption_end))
443    }
444}
445
446fn previous_nonblank_looks_like_table(lines: &(impl LineView + ?Sized), pos: usize) -> bool {
447    if pos == 0 {
448        return false;
449    }
450    // Skip the blank gap directly above the caption candidate.
451    let mut i = pos;
452    while i > 0 && lines.line(i - 1).trim().is_empty() {
453        i -= 1;
454    }
455    // Scan the contiguous non-blank block above for any table shape. A
456    // simple/multiline table's dashed separator sits *above* its data rows
457    // (which are plain text and don't look like table syntax on their own), so
458    // we must walk the whole block, not just the nearest line, to recognize
459    // that this caption is the caption-after of a preceding table rather than a
460    // caption-before of the following one. Stop at the next blank line or a
461    // fenced-div fence.
462    while i > 0 {
463        i -= 1;
464        if lines.line(i).trim().is_empty() || line_is_fenced_div_fence(lines.line(i)) {
465            break;
466        }
467        if line_looks_like_table_syntax(lines.line(i).trim()) {
468            return true;
469        }
470    }
471    false
472}
473
474fn line_looks_like_table_syntax(line: &str) -> bool {
475    if line.starts_with('|') && line.matches('|').count() >= 2 {
476        return true;
477    }
478    if line.starts_with('+') && line.ends_with('+') && (line.contains('-') || line.contains('=')) {
479        return true;
480    }
481    try_parse_table_separator(line).is_some()
482        || try_parse_pipe_separator(line).is_some()
483        || try_parse_grid_separator(line).is_some()
484}
485
486/// Find caption after table (if any).
487/// Returns (caption_start, caption_end) positions, or None.
488fn find_caption_after_table(
489    lines: &(impl LineView + ?Sized),
490    table_end: usize,
491) -> Option<(usize, usize)> {
492    if table_end >= lines.line_count() {
493        return None;
494    }
495
496    let mut pos = table_end;
497
498    // Skip one blank line if present
499    if pos < lines.line_count() && lines.line(pos).trim().is_empty() {
500        pos += 1;
501    }
502
503    if pos >= lines.line_count() {
504        return None;
505    }
506
507    // Check if this line is a caption
508    if is_table_caption_start(lines.line(pos)) {
509        let caption_start = pos;
510        // Find end of caption (continues until blank line or fenced-div fence)
511        let mut caption_end = caption_start + 1;
512        while caption_end < lines.line_count()
513            && !lines.line(caption_end).trim().is_empty()
514            && !line_is_fenced_div_fence(lines.line(caption_end))
515        {
516            caption_end += 1;
517        }
518        Some((caption_start, caption_end))
519    } else {
520        None
521    }
522}
523
524/// Emit a table caption node.
525/// Emit caption text for a single line. If `lift_trailing_attrs` is set and
526/// the text ends with a balanced `{...}` block, lift it into a structural
527/// `ATTRIBUTE` node so `AttributeNode::cast` finds its id (matches Pandoc's
528/// `+caption_attributes` behavior — `: caption {#tbl-id}` gives the table
529/// the id).
530fn emit_caption_line_text(
531    builder: &mut GreenNodeBuilder<'static>,
532    text_with_newline: &str,
533    config: &ParserOptions,
534    lift_trailing_attrs: bool,
535) {
536    let (text, newline_str) = strip_newline(text_with_newline);
537
538    if lift_trailing_attrs
539        && !text.is_empty()
540        && let Some((_attrs, before_attrs, start_brace_pos)) =
541            try_parse_trailing_attributes_with_pos(text)
542    {
543        let trimmed_len = text.trim_end().len();
544        let space = &text[before_attrs.len()..start_brace_pos];
545        let raw_attrs = &text[start_brace_pos..trimmed_len];
546        let trailing_ws = &text[trimmed_len..];
547
548        if !before_attrs.is_empty() {
549            inline_emission::emit_inlines(builder, before_attrs, config, false);
550        }
551        if !space.is_empty() {
552            builder.token(SyntaxKind::WHITESPACE.into(), space);
553        }
554        emit_attribute_node(builder, raw_attrs);
555        if !trailing_ws.is_empty() {
556            builder.token(SyntaxKind::WHITESPACE.into(), trailing_ws);
557        }
558        if !newline_str.is_empty() {
559            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
560        }
561        return;
562    }
563
564    if !text.is_empty() {
565        inline_emission::emit_inlines(builder, text, config, false);
566    }
567    if !newline_str.is_empty() {
568        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
569    }
570}
571
572/// Emit the blank (container-only) lines in the absolute range `[from, to)` as
573/// `BLANK_LINE` nodes. Re-emits each line's container prefix as tokens via the
574/// window, so a `>`-only blank line between a caption and its table inside a
575/// blockquote round-trips losslessly. Mirrors the interior blank-row emitter in
576/// `try_parse_multiline_table`. An empty range emits nothing.
577fn emit_caption_blank_lines(
578    builder: &mut GreenNodeBuilder<'static>,
579    window: &StrippedLines<'_, '_>,
580    from: usize,
581    to: usize,
582) {
583    for abs in from..to {
584        // `window.line` is the container-stripped view, so a `>`-only line reads
585        // as blank.
586        if window.line(abs).trim().is_empty() {
587            builder.start_node(SyntaxKind::BLANK_LINE.into());
588            let tail = window.emit_or_dispatch_tail(builder, abs);
589            builder.token(SyntaxKind::BLANK_LINE.into(), tail);
590            builder.finish_node();
591        }
592    }
593}
594
595fn emit_table_caption(
596    builder: &mut GreenNodeBuilder<'static>,
597    window: &StrippedLines<'_, '_>,
598    start: usize,
599    end: usize,
600    config: &ParserOptions,
601) {
602    builder.start_node(SyntaxKind::TABLE_CAPTION.into());
603
604    let last_idx = (end - start).saturating_sub(1);
605
606    for (i, abs) in (start..end).enumerate() {
607        let lift_attrs = i == last_idx;
608
609        // Re-emit this caption line's container prefix (`>`/whitespace) as
610        // tokens — except the dispatch line, whose prefix the core already
611        // emitted — and operate on the stripped `tail`, so the caption prefix
612        // (`Table:`/`:`) is recognized inside a blockquote or list rather than
613        // swallowed into the caption text (which doubled the marker and broke
614        // losslessness).
615        let tail = window.emit_or_dispatch_tail(builder, abs);
616
617        if i == 0 {
618            // First line - parse and emit prefix separately
619            let trimmed = tail.trim_start();
620            let leading_ws_len = tail.len() - trimmed.len();
621
622            // Emit leading whitespace if present
623            if leading_ws_len > 0 {
624                builder.token(SyntaxKind::WHITESPACE.into(), &tail[..leading_ws_len]);
625            }
626
627            // Check for caption prefix and emit separately
628            // Calculate where the prefix ends (after trimmed content)
629            let prefix_and_rest = if tail.ends_with('\n') {
630                &tail[leading_ws_len..tail.len() - 1] // Exclude newline
631            } else {
632                &tail[leading_ws_len..]
633            };
634
635            let (prefix_len, prefix_text) = if prefix_and_rest.starts_with("Table: ") {
636                (7, "Table: ")
637            } else if prefix_and_rest.starts_with("table: ") {
638                (7, "table: ")
639            } else if prefix_and_rest.starts_with(": ") {
640                (2, ": ")
641            } else if prefix_and_rest.starts_with(':') {
642                (1, ":")
643            } else {
644                (0, "")
645            };
646
647            if prefix_len > 0 {
648                builder.token(SyntaxKind::TABLE_CAPTION_PREFIX.into(), prefix_text);
649
650                // Emit rest of line after prefix
651                let rest_start = leading_ws_len + prefix_len;
652                if rest_start < tail.len() {
653                    emit_caption_line_text(builder, &tail[rest_start..], config, lift_attrs);
654                }
655            } else {
656                // No recognized prefix, emit whole trimmed line
657                emit_caption_line_text(builder, &tail[leading_ws_len..], config, lift_attrs);
658            }
659        } else {
660            // Continuation lines - emit with inline parsing (attrs only on last line).
661            emit_caption_line_text(builder, tail, config, lift_attrs);
662        }
663    }
664
665    builder.finish_node(); // TABLE_CAPTION
666}
667
668/// Emit a table cell with inline content parsing.
669/// This is the core helper for Phase 7.1 table inline parsing migration.
670fn emit_table_cell(
671    builder: &mut GreenNodeBuilder<'static>,
672    cell_text: &str,
673    config: &ParserOptions,
674) {
675    builder.start_node(SyntaxKind::TABLE_CELL.into());
676
677    // Parse inline content within the cell
678    if !cell_text.is_empty() {
679        inline_emission::emit_inlines(builder, cell_text, config, false);
680    }
681
682    builder.finish_node(); // TABLE_CELL
683}
684
685/// Determine column alignments based on separator and optional header.
686fn determine_alignments(columns: &mut [Column], separator_line: &str, header_line: Option<&str>) {
687    for col in columns.iter_mut() {
688        let sep_slice = &separator_line[col.start..col.end];
689
690        if let Some(header) = header_line {
691            let header_start = column_offset_to_byte_index(header, col.start);
692            let header_end = column_offset_to_byte_index(header, col.end);
693
694            // Extract header text for this column
695            let header_text = if header_start < header_end {
696                header[header_start..header_end].trim()
697            } else if header_start < header.len() {
698                header[header_start..].trim()
699            } else {
700                ""
701            };
702
703            if header_text.is_empty() {
704                col.alignment = Alignment::Default;
705                continue;
706            }
707
708            // Find where the header text starts and ends within the column
709            let header_in_col = &header[header_start..header_end];
710            let text_start = header_in_col.len() - header_in_col.trim_start().len();
711            let text_end = header_in_col.trim_end().len() + text_start;
712
713            // Check dash alignment relative to text
714            let dashes_start = 0; // Dashes start at beginning of sep_slice
715            let dashes_end = sep_slice.len();
716
717            let flush_left = dashes_start == text_start;
718            let flush_right = dashes_end == text_end;
719
720            col.alignment = match (flush_left, flush_right) {
721                (true, true) => Alignment::Default,
722                (true, false) => Alignment::Left,
723                (false, true) => Alignment::Right,
724                (false, false) => Alignment::Center,
725            };
726        } else {
727            // Without header, alignment based on first row (we'll handle this later)
728            col.alignment = Alignment::Default;
729        }
730    }
731}
732
733/// Try to parse a simple table starting at the given position.
734/// Returns the number of lines consumed if successful.
735pub(crate) fn try_parse_simple_table(
736    window: &StrippedLines<'_, '_>,
737    builder: &mut GreenNodeBuilder<'static>,
738    config: &ParserOptions,
739) -> Option<usize> {
740    let lines = window.raw();
741    let start_pos = window.pos();
742    log::trace!("try_parse_simple_table at line {}", start_pos + 1);
743
744    if start_pos >= lines.len() {
745        return None;
746    }
747
748    // Cheap gate before the O(buffer) `strip_all` below: a simple table's
749    // separator must sit on the dispatch line or the line just after it (see
750    // `find_separator_line`). Table detection runs at every block start, so
751    // stripping the whole line buffer for every prose/math paragraph that
752    // can't be a table was quadratic on large documents. Peek just those one
753    // or two lines via `strip_at` and bail before materializing the full view.
754    let gate_first = window.strip_at(start_pos);
755    let separator_here = try_parse_table_separator(gate_first).is_some();
756    let separator_next = !separator_here
757        && start_pos + 1 < lines.len()
758        && !gate_first.trim().is_empty()
759        && try_parse_table_separator(window.strip_at(start_pos + 1)).is_some();
760    if !separator_here && !separator_next {
761        return None;
762    }
763
764    // Detection scans read the container-prefix-stripped view lazily through
765    // the window (see `LineView`): a table nested in `list → blockquote`
766    // (e.g. `- >  a   b`) has its `  > ` prefix removed before the
767    // separator/column-shape checks. With an empty prefix the stripped view
768    // equals the raw lines. Scans stop at the first blank line, so only a
769    // bounded range is ever stripped. Emission re-emits the prefix bytes as
770    // tokens via the window; captions/blank lines still read raw `lines`.
771
772    // Look for a separator line
773    let separator_pos = find_separator_line(window, start_pos)?;
774    log::trace!("  found separator at line {}", separator_pos + 1);
775
776    let separator_line = window.line(separator_pos);
777    let mut columns = try_parse_table_separator(separator_line)?;
778
779    // Determine if there's a header (separator not at start)
780    let has_header = separator_pos > start_pos;
781    let header_line = if has_header {
782        Some(window.line(separator_pos - 1))
783    } else {
784        None
785    };
786
787    // Determine alignments
788    determine_alignments(&mut columns, separator_line, header_line);
789
790    // Find table end (blank line or end of input)
791    let end_pos = find_table_end(window, separator_pos + 1);
792
793    // Must have at least one data row (or it's just a separator)
794    let data_rows = end_pos - separator_pos - 1;
795
796    if data_rows == 0 {
797        return None;
798    }
799
800    // Check for caption before table
801    let caption_before = find_caption_before_table(window, start_pos);
802
803    // Check for caption after table
804    let caption_after = if caption_before.is_some() {
805        None
806    } else {
807        find_caption_after_table(window, end_pos)
808    };
809
810    // Build the table
811    builder.start_node(SyntaxKind::SIMPLE_TABLE.into());
812
813    // Emit caption before if present
814    if let Some((cap_start, cap_end)) = caption_before {
815        emit_table_caption(builder, window, cap_start, cap_end, config);
816        // Emit blank line between caption and table if present
817        emit_caption_blank_lines(builder, window, cap_end, start_pos);
818    }
819
820    // Emit header if present. On the dispatch line the core already emitted
821    // the container prefix; only continuation rows re-emit it (via the window
822    // inside `emit_table_row`).
823    if has_header {
824        emit_table_row(
825            builder,
826            window,
827            separator_pos - 1,
828            &columns,
829            SyntaxKind::TABLE_HEADER,
830            config,
831        );
832    }
833
834    // Emit separator, re-emitting any continuation-line container prefix
835    // (`  > `) as WHITESPACE/BLOCK_QUOTE_MARKER tokens before the row text.
836    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
837    let separator_tail = window.emit_or_dispatch_tail(builder, separator_pos);
838    emit_separator_tokens(builder, separator_tail);
839    builder.finish_node();
840
841    // Emit data rows (always continuation lines)
842    for idx in (separator_pos + 1)..end_pos {
843        emit_table_row(
844            builder,
845            window,
846            idx,
847            &columns,
848            SyntaxKind::TABLE_ROW,
849            config,
850        );
851    }
852
853    // Emit caption after if present
854    if let Some((cap_start, cap_end)) = caption_after {
855        // Emit blank line before caption if needed
856        emit_caption_blank_lines(builder, window, end_pos, cap_start);
857        emit_table_caption(builder, window, cap_start, cap_end, config);
858    }
859
860    builder.finish_node(); // SimpleTable
861
862    // Calculate lines consumed (including captions)
863    let table_start = if let Some((cap_start, _)) = caption_before {
864        cap_start
865    } else if has_header {
866        separator_pos - 1
867    } else {
868        separator_pos
869    };
870
871    let table_end = if let Some((_, cap_end)) = caption_after {
872        cap_end
873    } else {
874        end_pos
875    };
876
877    let lines_consumed = table_end - table_start;
878
879    Some(lines_consumed)
880}
881
882/// Find the position of a separator line starting from pos.
883fn find_separator_line(lines: &(impl LineView + ?Sized), start_pos: usize) -> Option<usize> {
884    log::trace!("  find_separator_line from line {}", start_pos + 1);
885
886    // Check first line
887    log::trace!("    checking first line: {:?}", lines.line(start_pos));
888    if try_parse_table_separator(lines.line(start_pos)).is_some() {
889        log::trace!("    separator found at first line");
890        return Some(start_pos);
891    }
892
893    // Check second line (for table with header)
894    if start_pos + 1 < lines.line_count()
895        && !lines.line(start_pos).trim().is_empty()
896        && try_parse_table_separator(lines.line(start_pos + 1)).is_some()
897    {
898        return Some(start_pos + 1);
899    }
900    None
901}
902
903/// Find where the table ends (first blank line or end of input).
904fn find_table_end(lines: &(impl LineView + ?Sized), start_pos: usize) -> usize {
905    for i in start_pos..lines.line_count() {
906        if lines.line(i).trim().is_empty() {
907            return i;
908        }
909        // Check if this could be a closing separator
910        if try_parse_table_separator(lines.line(i)).is_some() {
911            // Check if next line is blank or end
912            if i + 1 >= lines.line_count() || lines.line(i + 1).trim().is_empty() {
913                return i + 1;
914            }
915        }
916    }
917    lines.line_count()
918}
919
920/// Emit a table row (header or data row) with inline-parsed cells for simple tables.
921/// Uses column boundaries from the separator line to extract cells.
922fn emit_table_row(
923    builder: &mut GreenNodeBuilder<'static>,
924    window: &StrippedLines<'_, '_>,
925    abs_idx: usize,
926    columns: &[Column],
927    row_kind: SyntaxKind,
928    config: &ParserOptions,
929) {
930    builder.start_node(row_kind.into());
931
932    // On continuation lines the leading `  > ` prefix is re-emitted as
933    // WHITESPACE/BLOCK_QUOTE_MARKER tokens inside the row node and the
934    // stripped tail returned; the dispatch line just strips its (already
935    // core-emitted) prefix. Empty prefix ⇒ the raw line.
936    let line = window.emit_or_dispatch_tail(builder, abs_idx);
937
938    let (line_without_newline, newline_str) = strip_newline(line);
939
940    // Emit leading whitespace if present
941    let trimmed = line_without_newline.trim_start();
942    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
943    if leading_ws_len > 0 {
944        builder.token(
945            SyntaxKind::WHITESPACE.into(),
946            &line_without_newline[..leading_ws_len],
947        );
948    }
949
950    // Track where we are in the line (for losslessness)
951    let mut current_pos = 0;
952
953    // Extract and emit cells based on column boundaries
954    for col in columns.iter() {
955        // Calculate actual positions in the trimmed line (accounting for leading whitespace)
956        let cell_start = if col.start >= leading_ws_len {
957            column_offset_to_byte_index(trimmed, col.start - leading_ws_len)
958        } else {
959            0
960        };
961
962        let cell_end = if col.end >= leading_ws_len {
963            column_offset_to_byte_index(trimmed, col.end - leading_ws_len)
964        } else {
965            0
966        };
967
968        // Extract cell text from column bounds. When the column lies entirely
969        // before the trimmed content (col.end <= leading_ws_len) both bounds
970        // clamp to 0; treat that as an empty cell rather than re-emitting the
971        // whole row.
972        let cell_text = if cell_start < cell_end && cell_start < trimmed.len() {
973            &trimmed[cell_start..cell_end]
974        } else {
975            ""
976        };
977
978        let cell_content = cell_text.trim();
979        let cell_content_start = cell_text.len() - cell_text.trim_start().len();
980
981        // Emit any whitespace from current position to start of cell content
982        let content_abs_pos = (cell_start + cell_content_start).min(trimmed.len());
983        if current_pos < content_abs_pos {
984            builder.token(
985                SyntaxKind::WHITESPACE.into(),
986                &trimmed[current_pos..content_abs_pos],
987            );
988        }
989
990        // Emit cell with inline parsing
991        emit_table_cell(builder, cell_content, config);
992
993        // Update current position to end of cell content
994        current_pos = content_abs_pos + cell_content.len();
995    }
996
997    // Emit any remaining whitespace after last cell
998    if current_pos < trimmed.len() {
999        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
1000    }
1001
1002    // Emit newline if present
1003    if !newline_str.is_empty() {
1004        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1005    }
1006
1007    builder.finish_node();
1008}
1009
1010// ============================================================================
1011// Pipe Table Parsing
1012// ============================================================================
1013
1014/// Check if a line is a pipe table separator line.
1015/// Returns the column alignments if it's a valid separator.
1016fn try_parse_pipe_separator(line: &str) -> Option<Vec<Alignment>> {
1017    let trimmed = line.trim();
1018
1019    // Must contain at least one pipe
1020    if !trimmed.contains('|') && !trimmed.contains('+') {
1021        return None;
1022    }
1023
1024    // Split by pipes (or + for orgtbl variant)
1025    let cells: Vec<&str> = if trimmed.contains('+') {
1026        // Orgtbl variant: use + as separator in separator line
1027        trimmed.split(['|', '+']).collect()
1028    } else {
1029        trimmed.split('|').collect()
1030    };
1031
1032    let mut alignments = Vec::new();
1033
1034    for cell in cells {
1035        let cell = cell.trim();
1036
1037        // Skip empty cells (from leading/trailing pipes)
1038        if cell.is_empty() {
1039            continue;
1040        }
1041
1042        // Must be dashes with optional colons
1043        let starts_colon = cell.starts_with(':');
1044        let ends_colon = cell.ends_with(':');
1045
1046        // Remove colons to check if rest is all dashes
1047        let without_colons = cell.trim_start_matches(':').trim_end_matches(':');
1048
1049        // Must have at least one dash
1050        if without_colons.is_empty() || !without_colons.chars().all(|c| c == '-') {
1051            return None;
1052        }
1053
1054        // Determine alignment from colon positions
1055        let alignment = match (starts_colon, ends_colon) {
1056            (true, true) => Alignment::Center,
1057            (true, false) => Alignment::Left,
1058            (false, true) => Alignment::Right,
1059            (false, false) => Alignment::Default,
1060        };
1061
1062        alignments.push(alignment);
1063    }
1064
1065    // Must have at least one column
1066    if alignments.is_empty() {
1067        None
1068    } else {
1069        Some(alignments)
1070    }
1071}
1072
1073/// Split a pipe table row into cells.
1074/// Handles escaped pipes (\|) properly by not splitting on them.
1075fn parse_pipe_table_row(line: &str) -> Vec<String> {
1076    let trimmed = line.trim();
1077
1078    let mut cells = Vec::new();
1079    let mut current_cell = String::new();
1080    let mut chars = trimmed.chars().peekable();
1081    let mut char_count = 0;
1082
1083    while let Some(ch) = chars.next() {
1084        char_count += 1;
1085        match ch {
1086            '\\' => {
1087                // Check if next char is a pipe - if so, it's an escaped pipe
1088                if let Some(&'|') = chars.peek() {
1089                    current_cell.push('\\');
1090                    current_cell.push('|');
1091                    chars.next(); // consume the pipe
1092                } else {
1093                    current_cell.push(ch);
1094                }
1095            }
1096            '|' => {
1097                // Check if this is the leading pipe (first character)
1098                if char_count == 1 {
1099                    continue; // Skip leading pipe
1100                }
1101
1102                // End current cell, start new one
1103                cells.push(current_cell.trim().to_string());
1104                current_cell.clear();
1105            }
1106            _ => {
1107                current_cell.push(ch);
1108            }
1109        }
1110    }
1111
1112    // Add last cell if it's not empty (it would be empty if line ended with pipe)
1113    let trimmed_cell = current_cell.trim().to_string();
1114    if !trimmed_cell.is_empty() {
1115        cells.push(trimmed_cell);
1116    }
1117
1118    cells
1119}
1120
1121/// Emit a pipe table row with inline-parsed cells.
1122/// Preserves losslessness by emitting exact byte representation while parsing cell content inline.
1123fn emit_pipe_table_row(
1124    builder: &mut GreenNodeBuilder<'static>,
1125    window: &StrippedLines<'_, '_>,
1126    abs_idx: usize,
1127    row_kind: SyntaxKind,
1128    config: &ParserOptions,
1129) {
1130    builder.start_node(row_kind.into());
1131
1132    // On continuation lines (separator/data rows under a list+blockquote
1133    // container) the leading `  > ` prefix is not consumed by the core;
1134    // `emit_prefix_at` re-emits it as WHITESPACE/BLOCK_QUOTE_MARKER tokens
1135    // and returns the stripped tail. On the dispatch line the core already
1136    // emitted the prefix, so `dispatch_tail` just strips it from our view.
1137    // With an empty prefix (non-nested tables) both are no-ops returning
1138    // the raw line.
1139    let line = if abs_idx == window.dispatch_pos() {
1140        window.dispatch_tail()
1141    } else {
1142        window.emit_prefix_at(builder, abs_idx)
1143    };
1144
1145    let (line_without_newline, newline_str) = strip_newline(line);
1146    let trimmed = line_without_newline.trim();
1147
1148    // Parse cell boundaries
1149    let mut cell_starts = Vec::new();
1150    let mut cell_ends = Vec::new();
1151    let mut in_escape = false;
1152
1153    // Find all pipe positions (excluding escaped ones)
1154    let mut pipe_positions = Vec::new();
1155    for (i, ch) in trimmed.char_indices() {
1156        if in_escape {
1157            in_escape = false;
1158            continue;
1159        }
1160        if ch == '\\' {
1161            in_escape = true;
1162            continue;
1163        }
1164        if ch == '|' {
1165            pipe_positions.push(i);
1166        }
1167    }
1168
1169    // Determine cell boundaries based on pipe positions
1170    if pipe_positions.is_empty() {
1171        // No pipes - treat entire line as one cell (shouldn't happen for valid pipe tables)
1172        cell_starts.push(0);
1173        cell_ends.push(trimmed.len());
1174    } else {
1175        // Check if line starts with pipe
1176        let start_pipe = pipe_positions.first() == Some(&0);
1177        // Check if line ends with pipe
1178        let end_pipe = pipe_positions.last() == Some(&(trimmed.len() - 1));
1179
1180        if start_pipe {
1181            // Skip first pipe
1182            for i in 1..pipe_positions.len() {
1183                cell_starts.push(pipe_positions[i - 1] + 1);
1184                cell_ends.push(pipe_positions[i]);
1185            }
1186            // Add last cell if there's no trailing pipe
1187            if !end_pipe {
1188                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1189                cell_ends.push(trimmed.len());
1190            }
1191        } else {
1192            // No leading pipe
1193            cell_starts.push(0);
1194            cell_ends.push(pipe_positions[0]);
1195
1196            for i in 1..pipe_positions.len() {
1197                cell_starts.push(pipe_positions[i - 1] + 1);
1198                cell_ends.push(pipe_positions[i]);
1199            }
1200
1201            // Add last cell if there's no trailing pipe
1202            if !end_pipe {
1203                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1204                cell_ends.push(trimmed.len());
1205            }
1206        }
1207    }
1208
1209    // Emit leading whitespace if present (before trim)
1210    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1211    if leading_ws_len > 0 {
1212        builder.token(
1213            SyntaxKind::WHITESPACE.into(),
1214            &line_without_newline[..leading_ws_len],
1215        );
1216    }
1217
1218    // Emit cells with pipes
1219    for (idx, (start, end)) in cell_starts.iter().zip(cell_ends.iter()).enumerate() {
1220        // Emit pipe before cell (except for first cell if no leading pipe)
1221        if *start > 0 {
1222            builder.token(SyntaxKind::TEXT.into(), "|");
1223        } else if idx == 0 && trimmed.starts_with('|') {
1224            // Leading pipe
1225            builder.token(SyntaxKind::TEXT.into(), "|");
1226        }
1227
1228        // Get cell content with its whitespace
1229        let cell_with_ws = &trimmed[*start..*end];
1230        let cell_content = cell_with_ws.trim();
1231
1232        // Emit leading whitespace within cell
1233        let cell_leading_ws = &cell_with_ws[..cell_with_ws.len() - cell_with_ws.trim_start().len()];
1234        if !cell_leading_ws.is_empty() {
1235            builder.token(SyntaxKind::WHITESPACE.into(), cell_leading_ws);
1236        }
1237
1238        // Emit cell with inline parsing
1239        emit_table_cell(builder, cell_content, config);
1240
1241        // Emit trailing whitespace within cell
1242        let cell_trailing_ws_start = cell_leading_ws.len() + cell_content.len();
1243        if cell_trailing_ws_start < cell_with_ws.len() {
1244            builder.token(
1245                SyntaxKind::WHITESPACE.into(),
1246                &cell_with_ws[cell_trailing_ws_start..],
1247            );
1248        }
1249    }
1250
1251    // Emit trailing pipe if present
1252    if !pipe_positions.is_empty() && trimmed.ends_with('|') {
1253        builder.token(SyntaxKind::TEXT.into(), "|");
1254    }
1255
1256    // Emit trailing whitespace after trim (before newline)
1257    let trailing_ws_start = leading_ws_len + trimmed.len();
1258    if trailing_ws_start < line_without_newline.len() {
1259        builder.token(
1260            SyntaxKind::WHITESPACE.into(),
1261            &line_without_newline[trailing_ws_start..],
1262        );
1263    }
1264
1265    // Emit newline
1266    if !newline_str.is_empty() {
1267        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1268    }
1269
1270    builder.finish_node();
1271}
1272
1273/// Try to parse a pipe table starting at the given position.
1274/// Returns the number of lines consumed if successful.
1275pub(crate) fn try_parse_pipe_table(
1276    window: &StrippedLines<'_, '_>,
1277    builder: &mut GreenNodeBuilder<'static>,
1278    config: &ParserOptions,
1279) -> Option<usize> {
1280    let lines = window.raw();
1281    let start_pos = window.pos();
1282    if start_pos + 1 >= lines.len() {
1283        return None;
1284    }
1285
1286    // Cheap gate: a pipe table's first line must contain a `|` (it is either
1287    // the header or, headerless, the delimiter row), unless this is a
1288    // caption-led table. Table detection runs at every block start, so doing
1289    // any per-line work for every prose/math paragraph was quadratic on large
1290    // documents. Peek the dispatch line and run the (bounded) caption probe on
1291    // the same stripped `window` the detection below uses, so the gate applies
1292    // inside containers (blockquote/list) too — not just at top level.
1293    if !window.strip_at(start_pos).contains('|') && !is_caption_followed_by_table(window, start_pos)
1294    {
1295        return None;
1296    }
1297
1298    // Detection scans read the container-prefix-stripped view lazily through
1299    // the window (see `LineView`), so a table nested in `list → blockquote`
1300    // (e.g. `- > | a | b |`) has its `  > ` prefix removed before the
1301    // separator/cell shape checks. The dispatch line uses the emission-safe
1302    // line-0 strip (its prefix was consumed by the core); every other line
1303    // gets the full continuation strip. Scans stop at the first blank line, so
1304    // only a bounded range is stripped. Emission still reads raw `lines` so the
1305    // prefix bytes can be re-emitted as tokens.
1306
1307    // Check if this line is a caption followed by a table
1308    // If so, the actual table starts after the caption and blank line
1309    let (actual_start, caption_before) = if is_caption_followed_by_table(window, start_pos) {
1310        let (cap_start, cap_end) = caption_range_starting_at(window, start_pos)?;
1311        let mut pos = cap_end;
1312        while pos < window.line_count() && window.line(pos).trim().is_empty() {
1313            pos += 1;
1314        }
1315        (pos, Some((cap_start, cap_end)))
1316    } else {
1317        (start_pos, None)
1318    };
1319
1320    if actual_start + 1 >= lines.len() {
1321        return None;
1322    }
1323
1324    // First line should have pipes (potential header)
1325    if !window.line(actual_start).contains('|') {
1326        return None;
1327    }
1328
1329    // Second line should be separator
1330    let alignments = try_parse_pipe_separator(window.line(actual_start + 1))?;
1331
1332    // Parse header cells
1333    let header_cells = parse_pipe_table_row(window.line(actual_start));
1334
1335    // Number of columns should match (approximately - be lenient)
1336    if header_cells.len() != alignments.len() && !header_cells.is_empty() {
1337        // Only fail if very different
1338        if header_cells.len() < alignments.len() / 2 || header_cells.len() > alignments.len() * 2 {
1339            return None;
1340        }
1341    }
1342
1343    // Find table end (first blank line or end of input)
1344    let mut end_pos = actual_start + 2;
1345    while end_pos < window.line_count() {
1346        let line = window.line(end_pos);
1347        if line.trim().is_empty() {
1348            break;
1349        }
1350        // Row should have pipes
1351        if !line.contains('|') {
1352            break;
1353        }
1354        end_pos += 1;
1355    }
1356
1357    // Must have at least one data row
1358    if end_pos <= actual_start + 2 {
1359        return None;
1360    }
1361
1362    // Check for caption before table (only if we didn't already detect it)
1363    let caption_before = caption_before.or_else(|| find_caption_before_table(window, actual_start));
1364
1365    // Check for caption after table
1366    let caption_after = if caption_before.is_some() {
1367        None
1368    } else {
1369        find_caption_after_table(window, end_pos)
1370    };
1371
1372    // Build the pipe table
1373    builder.start_node(SyntaxKind::PIPE_TABLE.into());
1374
1375    // Emit caption before if present
1376    if let Some((cap_start, cap_end)) = caption_before {
1377        emit_table_caption(builder, window, cap_start, cap_end, config);
1378        // Emit blank line between caption and table if present
1379        emit_caption_blank_lines(builder, window, cap_end, actual_start);
1380    }
1381
1382    // Emit header row with inline-parsed cells. On the dispatch line the
1383    // core already emitted the container prefix; only when the header is a
1384    // continuation line (e.g. it follows a caption-before line) do we emit
1385    // the prefix here.
1386    emit_pipe_table_row(
1387        builder,
1388        window,
1389        actual_start,
1390        SyntaxKind::TABLE_HEADER,
1391        config,
1392    );
1393
1394    // Emit separator, re-emitting any continuation-line container prefix
1395    // (`  > `) as WHITESPACE/BLOCK_QUOTE_MARKER tokens before the row text.
1396    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1397    let sep_idx = actual_start + 1;
1398    let separator_tail = if sep_idx == window.dispatch_pos() {
1399        window.dispatch_tail()
1400    } else {
1401        window.emit_prefix_at(builder, sep_idx)
1402    };
1403    emit_separator_tokens(builder, separator_tail);
1404    builder.finish_node();
1405
1406    // Emit data rows with inline-parsed cells (always continuation lines)
1407    for idx in (actual_start + 2)..end_pos {
1408        emit_pipe_table_row(builder, window, idx, SyntaxKind::TABLE_ROW, config);
1409    }
1410
1411    // Emit caption after if present
1412    if let Some((cap_start, cap_end)) = caption_after {
1413        // Emit blank line before caption if needed
1414        emit_caption_blank_lines(builder, window, end_pos, cap_start);
1415        emit_table_caption(builder, window, cap_start, cap_end, config);
1416    }
1417
1418    builder.finish_node(); // PipeTable
1419
1420    // Calculate lines consumed
1421    let table_start = caption_before
1422        .map(|(start, _)| start)
1423        .unwrap_or(actual_start);
1424    let table_end = if let Some((_, cap_end)) = caption_after {
1425        cap_end
1426    } else {
1427        end_pos
1428    };
1429
1430    Some(table_end - table_start)
1431}
1432
1433#[cfg(test)]
1434mod tests {
1435    use super::super::container_prefix::ContainerPrefix;
1436    use super::*;
1437
1438    #[test]
1439    fn test_separator_detection() {
1440        assert!(try_parse_table_separator("------- ------ ----------   -------").is_some());
1441        assert!(try_parse_table_separator("  ---  ---  ---").is_some());
1442        assert!(try_parse_table_separator("-------").is_none()); // horizontal rule
1443        assert!(try_parse_table_separator("--- --- ---").is_some()); // table separator
1444    }
1445
1446    #[test]
1447    fn test_column_extraction() {
1448        let line = "-------     ------ ----------   -------";
1449        let columns = extract_columns(line, 0);
1450        assert_eq!(columns.len(), 4);
1451    }
1452
1453    #[test]
1454    fn test_simple_table_with_header() {
1455        let input = vec![
1456            "  Right     Left     Center     Default",
1457            "-------     ------ ----------   -------",
1458            "     12     12        12            12",
1459            "    123     123       123          123",
1460            "",
1461        ];
1462
1463        let mut builder = GreenNodeBuilder::new();
1464        let prefix = ContainerPrefix::default();
1465        let window = StrippedLines::new(&input, 0, &prefix);
1466        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1467
1468        assert!(result.is_some());
1469        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1470    }
1471
1472    #[test]
1473    fn test_headerless_table() {
1474        let input = vec![
1475            "-------     ------ ----------   -------",
1476            "     12     12        12            12",
1477            "    123     123       123          123",
1478            "",
1479        ];
1480
1481        let mut builder = GreenNodeBuilder::new();
1482        let prefix = ContainerPrefix::default();
1483        let window = StrippedLines::new(&input, 0, &prefix);
1484        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1485
1486        assert!(result.is_some());
1487        assert_eq!(result.unwrap(), 3); // sep + 2 rows
1488    }
1489
1490    #[test]
1491    fn test_caption_prefix_detection() {
1492        assert!(try_parse_caption_prefix("Table: My caption").is_some());
1493        assert!(try_parse_caption_prefix("table: My caption").is_some());
1494        assert!(try_parse_caption_prefix(": My caption").is_some());
1495        assert!(try_parse_caption_prefix(":").is_none()); // Just colon, no content
1496        assert!(try_parse_caption_prefix("Not a caption").is_none());
1497    }
1498
1499    #[test]
1500    fn table_grid_starts_at_matches_each_kind() {
1501        // Positives — one shape per table kind the real parsers accept.
1502        assert!(table_grid_starts_at(&["+---+---+"][..], 0)); // grid
1503        assert!(table_grid_starts_at(&["----------- -------"][..], 0)); // multiline
1504        assert!(table_grid_starts_at(&["--- --- ---"][..], 0)); // simple, headerless
1505        assert!(table_grid_starts_at(&["A | B", "| --- | --- |"][..], 0)); // pipe, header + sep
1506        assert!(table_grid_starts_at(&["A    B", "--- ---"][..], 0)); // simple, header + sep
1507        // A lone dash run is a multiline full-width separator under Pandoc (not a
1508        // thematic break), so the lookahead intentionally accepts it; the full
1509        // parser then rejects it if no rows follow.
1510        assert!(table_grid_starts_at(&["-------"][..], 0));
1511
1512        // Negatives — shapes that must not read as a table start.
1513        assert!(!table_grid_starts_at(&["just some prose"][..], 0));
1514        assert!(!table_grid_starts_at(&["# Heading"][..], 0));
1515        assert!(!table_grid_starts_at(&["```", "code", "```"][..], 0)); // code fence
1516        assert!(!table_grid_starts_at(&["only one line"][..], 1)); // out of range
1517    }
1518
1519    /// The cheap caption lookahead must agree with what the full parser does:
1520    /// when it says a table follows the caption, a table node really forms; when
1521    /// it says no table follows, none does. This guards against the lookahead
1522    /// (`table_grid_starts_at`) drifting from the real per-kind parsers.
1523    #[test]
1524    fn caption_lookahead_agrees_with_real_parse() {
1525        let with_table = ": Cap\n\n| A | B |\n|---|---|\n| 1 | 2 |\n";
1526        let lines: Vec<&str> = with_table.lines().collect();
1527        assert!(is_caption_followed_by_table(&lines[..], 0));
1528        assert!(format!("{:#?}", crate::parse(with_table, None)).contains("PIPE_TABLE"));
1529
1530        let no_table = ": Cap\n\nplain paragraph\n";
1531        let lines: Vec<&str> = no_table.lines().collect();
1532        assert!(!is_caption_followed_by_table(&lines[..], 0));
1533        assert!(!format!("{:#?}", crate::parse(no_table, None)).contains("TABLE"));
1534    }
1535
1536    /// Pandoc parses `table` before `orderedList` (but `bulletList` before
1537    /// `table`) in its `block` choice. So an ordered marker whose line is the
1538    /// header of a valid pipe table is NOT a list: the whole construct is a
1539    /// top-level table absorbing the marker as the first header cell. Bullets
1540    /// and a lone ordered marker (no delimiter) stay lists. Verified against
1541    /// pandoc 3.9 (`-f markdown -t native`).
1542    #[test]
1543    fn ordered_marker_on_pipe_table_line_is_top_level_table() {
1544        let input = "1. | a | b |\n   | - | - |\n   | 1 | 2 |\n";
1545        let tree = crate::parse(input, None);
1546        assert!(
1547            tree.descendants()
1548                .any(|n| n.kind() == SyntaxKind::PIPE_TABLE),
1549            "ordered marker + pipe table on the marker line should be a top-level table"
1550        );
1551        assert!(
1552            !tree.descendants().any(|n| n.kind() == SyntaxKind::LIST),
1553            "it must not nest under a list"
1554        );
1555        // Lossless: the marker and the overflow cell survive in the CST.
1556        let dump = format!("{tree:#?}");
1557        assert!(
1558            dump.contains("1."),
1559            "marker text preserved as a header cell"
1560        );
1561        assert!(dump.contains('b'), "overflow cell `b` preserved (lossless)");
1562    }
1563
1564    #[test]
1565    fn lone_ordered_marker_pipe_line_is_a_list() {
1566        // No delimiter row → pandoc's `table` fails, `orderedList` catches it.
1567        let input = "1. | a | b |\n";
1568        let tree = crate::parse(input, None);
1569        assert!(
1570            tree.descendants().any(|n| n.kind() == SyntaxKind::LIST),
1571            "a lone ordered marker line stays a list"
1572        );
1573        assert!(
1574            !tree
1575                .descendants()
1576                .any(|n| n.kind() == SyntaxKind::PIPE_TABLE),
1577            "no table without a delimiter row"
1578        );
1579    }
1580
1581    #[test]
1582    fn bullet_marker_on_pipe_table_line_stays_a_nested_table() {
1583        // Bullets already match pandoc (`BulletList -> Table`): regression guard.
1584        let input = "- | a | b |\n  | - | - |\n  | 1 | 2 |\n";
1585        let tree = crate::parse(input, None);
1586        assert!(
1587            tree.descendants().any(|n| n.kind() == SyntaxKind::LIST),
1588            "bullet marker keeps the list"
1589        );
1590        assert!(
1591            tree.descendants()
1592                .any(|n| n.kind() == SyntaxKind::PIPE_TABLE),
1593            "with the table nested inside the list item"
1594        );
1595    }
1596
1597    #[test]
1598    fn bare_colon_fenced_code_is_not_table_caption() {
1599        let input = "Term\n: ```\n  code\n  ```\n";
1600        let tree = crate::parse(input, None);
1601
1602        assert!(
1603            tree.descendants()
1604                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1605            "should parse as definition list"
1606        );
1607        assert!(
1608            tree.descendants()
1609                .any(|node| node.kind() == SyntaxKind::CODE_BLOCK),
1610            "definition should preserve fenced code block"
1611        );
1612        assert!(
1613            !tree
1614                .descendants()
1615                .any(|node| node.kind() == SyntaxKind::TABLE_CAPTION),
1616            "fenced code definition should not be parsed as table caption"
1617        );
1618    }
1619
1620    #[test]
1621    fn bare_colon_caption_after_div_opening_is_table_caption() {
1622        let input = "::: {#tbl:panel layout.nrow=\"1\"}\n  : My Caption {#tbl:foo-1}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\n  : My Caption2 {#tbl:foo-2}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\nCaption\n:::\n";
1623        let tree = crate::parse(input, None);
1624
1625        let caption_count = tree
1626            .descendants()
1627            .filter(|node| node.kind() == SyntaxKind::TABLE_CAPTION)
1628            .count();
1629        assert_eq!(
1630            caption_count, 2,
1631            "expected both captions to attach to tables"
1632        );
1633        assert!(
1634            !tree
1635                .descendants()
1636                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1637            "caption lines in this fenced div table layout should not parse as definition list"
1638        );
1639    }
1640
1641    #[test]
1642    fn test_table_with_caption_after() {
1643        let input = vec![
1644            "  Right     Left     Center     Default",
1645            "-------     ------ ----------   -------",
1646            "     12     12        12            12",
1647            "    123     123       123          123",
1648            "",
1649            "Table: Demonstration of simple table syntax.",
1650            "",
1651        ];
1652
1653        let mut builder = GreenNodeBuilder::new();
1654        let prefix = ContainerPrefix::default();
1655        let window = StrippedLines::new(&input, 0, &prefix);
1656        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1657
1658        assert!(result.is_some());
1659        // Should consume: header + sep + 2 rows + blank + caption
1660        assert_eq!(result.unwrap(), 6);
1661    }
1662
1663    #[test]
1664    fn test_table_with_caption_before() {
1665        let input = vec![
1666            "Table: Demonstration of simple table syntax.",
1667            "",
1668            "  Right     Left     Center     Default",
1669            "-------     ------ ----------   -------",
1670            "     12     12        12            12",
1671            "    123     123       123          123",
1672            "",
1673        ];
1674
1675        let mut builder = GreenNodeBuilder::new();
1676        let prefix = ContainerPrefix::default();
1677        let window = StrippedLines::new(&input, 2, &prefix);
1678        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1679
1680        assert!(result.is_some());
1681        // Should consume: caption + blank + header + sep + 2 rows
1682        assert_eq!(result.unwrap(), 6);
1683    }
1684
1685    #[test]
1686    fn test_caption_with_colon_prefix() {
1687        let input = vec![
1688            "  Right     Left",
1689            "-------     ------",
1690            "     12     12",
1691            "",
1692            ": Short caption",
1693            "",
1694        ];
1695
1696        let mut builder = GreenNodeBuilder::new();
1697        let prefix = ContainerPrefix::default();
1698        let window = StrippedLines::new(&input, 0, &prefix);
1699        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1700
1701        assert!(result.is_some());
1702        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1703    }
1704
1705    #[test]
1706    fn test_multiline_caption() {
1707        let input = vec![
1708            "  Right     Left",
1709            "-------     ------",
1710            "     12     12",
1711            "",
1712            "Table: This is a longer caption",
1713            "that spans multiple lines.",
1714            "",
1715        ];
1716
1717        let mut builder = GreenNodeBuilder::new();
1718        let prefix = ContainerPrefix::default();
1719        let window = StrippedLines::new(&input, 0, &prefix);
1720        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1721
1722        assert!(result.is_some());
1723        // Should consume through end of multi-line caption
1724        assert_eq!(result.unwrap(), 6);
1725    }
1726
1727    #[test]
1728    fn test_simple_table_with_multibyte_cell_content() {
1729        let input = vec![
1730            "Name            Hex code     Hue     C, M, Y, K (%)   R, G, B (0-255)   R, G, B (%)",
1731            "--------------  ------------ ------- ---------------- ----------------- ------------",
1732            "        orange       #E69F00     41° 0, 50, 100, 0    230, 159, 0       90, 60, 0",
1733            "      sky blue       #56B4E9    202° 80, 0, 0, 0      86, 180, 233      35, 70, 90",
1734            "",
1735        ];
1736
1737        let mut builder = GreenNodeBuilder::new();
1738        let prefix = ContainerPrefix::default();
1739        let window = StrippedLines::new(&input, 0, &prefix);
1740        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1741
1742        assert!(result.is_some());
1743        assert_eq!(result.unwrap(), 4);
1744    }
1745
1746    // Pipe table tests
1747    #[test]
1748    fn test_pipe_separator_detection() {
1749        assert!(try_parse_pipe_separator("|------:|:-----|---------|:------:|").is_some());
1750        assert!(try_parse_pipe_separator("|---|---|").is_some());
1751        assert!(try_parse_pipe_separator("-----|-----:").is_some()); // No leading pipe
1752        assert!(try_parse_pipe_separator("|-----+-------|").is_some()); // Orgtbl variant
1753        assert!(try_parse_pipe_separator("not a separator").is_none());
1754    }
1755
1756    #[test]
1757    fn test_pipe_alignments() {
1758        let aligns = try_parse_pipe_separator("|------:|:-----|---------|:------:|").unwrap();
1759        assert_eq!(aligns.len(), 4);
1760        assert_eq!(aligns[0], Alignment::Right);
1761        assert_eq!(aligns[1], Alignment::Left);
1762        assert_eq!(aligns[2], Alignment::Default);
1763        assert_eq!(aligns[3], Alignment::Center);
1764    }
1765
1766    #[test]
1767    fn test_parse_pipe_table_row() {
1768        let cells = parse_pipe_table_row("| Right | Left | Center |");
1769        assert_eq!(cells.len(), 3);
1770        assert_eq!(cells[0], "Right");
1771        assert_eq!(cells[1], "Left");
1772        assert_eq!(cells[2], "Center");
1773
1774        // Without leading/trailing pipes
1775        let cells2 = parse_pipe_table_row("Right | Left | Center");
1776        assert_eq!(cells2.len(), 3);
1777    }
1778
1779    #[test]
1780    fn test_basic_pipe_table() {
1781        let input = vec![
1782            "",
1783            "| Right | Left | Center |",
1784            "|------:|:-----|:------:|",
1785            "|   12  |  12  |   12   |",
1786            "|  123  |  123 |  123   |",
1787            "",
1788        ];
1789
1790        let mut builder = GreenNodeBuilder::new();
1791        let prefix = ContainerPrefix::default();
1792        let window = StrippedLines::new(&input, 1, &prefix);
1793        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1794
1795        assert!(result.is_some());
1796        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1797    }
1798
1799    #[test]
1800    fn test_pipe_table_no_edge_pipes() {
1801        let input = vec![
1802            "",
1803            "fruit| price",
1804            "-----|-----:",
1805            "apple|2.05",
1806            "pear|1.37",
1807            "",
1808        ];
1809
1810        let mut builder = GreenNodeBuilder::new();
1811        let prefix = ContainerPrefix::default();
1812        let window = StrippedLines::new(&input, 1, &prefix);
1813        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1814
1815        assert!(result.is_some());
1816        assert_eq!(result.unwrap(), 4);
1817    }
1818
1819    #[test]
1820    fn test_pipe_table_with_caption() {
1821        let input = vec![
1822            "",
1823            "| Col1 | Col2 |",
1824            "|------|------|",
1825            "| A    | B    |",
1826            "",
1827            "Table: My pipe table",
1828            "",
1829        ];
1830
1831        let mut builder = GreenNodeBuilder::new();
1832        let prefix = ContainerPrefix::default();
1833        let window = StrippedLines::new(&input, 1, &prefix);
1834        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1835
1836        assert!(result.is_some());
1837        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1838    }
1839
1840    #[test]
1841    fn test_pipe_table_with_multiline_caption_before() {
1842        let input = vec![
1843            ": (#tab:base) base R quoting",
1844            "functions",
1845            "",
1846            "| C | D |",
1847            "|---|---|",
1848            "| 3 | 4 |",
1849            "",
1850        ];
1851
1852        let mut builder = GreenNodeBuilder::new();
1853        let prefix = ContainerPrefix::default();
1854        let window = StrippedLines::new(&input, 0, &prefix);
1855        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1856
1857        assert!(result.is_some());
1858        // caption(2) + blank(1) + header + sep + row
1859        assert_eq!(result.unwrap(), 6);
1860    }
1861}
1862
1863// ============================================================================
1864// Grid Table Parsing
1865// ============================================================================
1866
1867/// Check if a line is a grid table row separator (starts with +, contains -, ends with +).
1868/// Returns Some(vec of column info) if valid, None otherwise.
1869fn try_parse_grid_separator(line: &str) -> Option<Vec<GridColumn>> {
1870    let trimmed = line.trim_start();
1871    let leading_spaces = line.len() - trimmed.len();
1872
1873    // A grid border must begin at column 0 of its container content. Detection
1874    // runs on the container-prefix-stripped line (see `try_parse_grid_table`),
1875    // so any remaining leading whitespace means the border is indented relative
1876    // to its container -- pandoc parses that as a paragraph, not a grid table.
1877    if leading_spaces > 0 {
1878        return None;
1879    }
1880
1881    // Must start with + and end with +
1882    if !trimmed.starts_with('+') || !trimmed.trim_end().ends_with('+') {
1883        return None;
1884    }
1885
1886    // Split by + to get column segments
1887    let trimmed = trimmed.trim_end();
1888    let segments: Vec<&str> = trimmed.split('+').collect();
1889
1890    // Need at least 3 parts: empty before first +, column(s), empty after last +
1891    if segments.len() < 3 {
1892        return None;
1893    }
1894
1895    let mut columns = Vec::new();
1896
1897    // Parse each segment between + signs
1898    for segment in segments.iter().skip(1).take(segments.len() - 2) {
1899        if segment.is_empty() {
1900            continue;
1901        }
1902
1903        // Segment must be dashes/equals with optional colons for alignment
1904        let seg_trimmed = *segment;
1905
1906        // Get the fill character (after removing colons)
1907        let inner = seg_trimmed.trim_start_matches(':').trim_end_matches(':');
1908
1909        // Must be all dashes or all equals
1910        if inner.is_empty() {
1911            return None;
1912        }
1913
1914        let first_char = inner.chars().next().unwrap();
1915        if first_char != '-' && first_char != '=' {
1916            return None;
1917        }
1918
1919        if !inner.chars().all(|c| c == first_char) {
1920            return None;
1921        }
1922
1923        let is_header_sep = first_char == '=';
1924
1925        columns.push(GridColumn {
1926            is_header_separator: is_header_sep,
1927            width: seg_trimmed.chars().count(),
1928        });
1929    }
1930
1931    if columns.is_empty() {
1932        None
1933    } else {
1934        Some(columns)
1935    }
1936}
1937
1938/// Column information for grid tables.
1939#[derive(Debug, Clone)]
1940struct GridColumn {
1941    is_header_separator: bool,
1942    width: usize,
1943}
1944
1945fn slice_cell_by_display_width(line: &str, start_byte: usize, width: usize) -> (usize, usize) {
1946    let mut end_byte = start_byte;
1947    let mut display_cols = 0usize;
1948
1949    for (offset, ch) in line[start_byte..].char_indices() {
1950        if ch == '|' {
1951            let sep_byte = start_byte + offset;
1952            return (sep_byte, sep_byte + 1);
1953        }
1954        let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
1955        if display_cols + ch_width > width {
1956            break;
1957        }
1958        display_cols += ch_width;
1959        end_byte = start_byte + offset + ch.len_utf8();
1960        if display_cols >= width {
1961            break;
1962        }
1963    }
1964
1965    // If the width budget is exhausted before seeing a separator (for example
1966    // because of padding/layout drift), advance to the next literal separator
1967    // to keep row slicing aligned and preserve losslessness.
1968    let mut sep_byte = end_byte;
1969    while sep_byte < line.len() {
1970        let mut chars = line[sep_byte..].chars();
1971        let Some(ch) = chars.next() else {
1972            break;
1973        };
1974        if ch == '|' {
1975            return (sep_byte, sep_byte + 1);
1976        }
1977        sep_byte += ch.len_utf8();
1978    }
1979
1980    (end_byte, end_byte)
1981}
1982
1983/// Check if a line is a grid table content row.
1984/// Accepts normal rows ending with `|` and spanning-style continuation lines ending with `+`.
1985fn is_grid_content_row(line: &str) -> bool {
1986    let trimmed = line.trim_start();
1987    let leading_spaces = line.len() - trimmed.len();
1988
1989    if leading_spaces > 3 {
1990        return false;
1991    }
1992
1993    let trimmed = trimmed.trim_end();
1994    trimmed.starts_with('|') && (trimmed.ends_with('|') || trimmed.ends_with('+'))
1995}
1996
1997/// Extract cell contents from a single grid table row line.
1998/// Returns a vector of cell contents (trimmed) based on column boundaries.
1999/// Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
2000fn extract_grid_cells_from_line(line: &str, _columns: &[GridColumn]) -> Vec<String> {
2001    let (line_content, _) = strip_newline(line);
2002    let line_trimmed = line_content.trim();
2003
2004    if !line_trimmed.starts_with('|') || !line_trimmed.ends_with('|') {
2005        return vec![String::new(); _columns.len()];
2006    }
2007
2008    let mut cells = Vec::with_capacity(_columns.len());
2009    let mut pos_byte = 1; // Skip leading pipe
2010
2011    for col in _columns {
2012        let col_idx = cells.len();
2013        if pos_byte >= line_trimmed.len() {
2014            cells.push(String::new());
2015            continue;
2016        }
2017
2018        let start_byte = pos_byte;
2019        let end_byte = if col_idx + 1 == _columns.len() {
2020            line_trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
2021        } else {
2022            let (end, next_start) = slice_cell_by_display_width(line_trimmed, pos_byte, col.width);
2023            pos_byte = next_start;
2024            end
2025        };
2026        cells.push(line_trimmed[start_byte..end_byte].trim().to_string());
2027        if col_idx + 1 == _columns.len() {
2028            pos_byte = line_trimmed.len();
2029        }
2030    }
2031
2032    cells
2033}
2034
2035/// Emit a grid table row with inline-parsed cells.
2036/// Handles multi-line rows by emitting first line with TABLE_CELL nodes,
2037/// then continuation lines as raw TEXT for losslessness.
2038fn emit_grid_table_row(
2039    builder: &mut GreenNodeBuilder<'static>,
2040    window: &StrippedLines<'_, '_>,
2041    indices: &[usize],
2042    columns: &[GridColumn],
2043    row_kind: SyntaxKind,
2044    config: &ParserOptions,
2045) {
2046    if indices.is_empty() {
2047        return;
2048    }
2049
2050    builder.start_node(row_kind.into());
2051
2052    // Emit first line with TABLE_CELL nodes. The continuation-line container
2053    // prefix (`  > `) is re-emitted as WHITESPACE/BLOCK_QUOTE_MARKER tokens
2054    // inside the row node before the cell text; the returned tail is the
2055    // prefix-stripped line we slice cells from (empty prefix ⇒ raw line).
2056    // Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
2057    let first_line = window.emit_or_dispatch_tail(builder, indices[0]);
2058    let cell_contents = extract_grid_cells_from_line(first_line, columns);
2059    let (line_without_newline, newline_str) = strip_newline(first_line);
2060    let trimmed = line_without_newline.trim();
2061    let expected_pipe_count = columns.len().saturating_add(1);
2062    let actual_pipe_count = trimmed.chars().filter(|&c| c == '|').count();
2063
2064    // Rows that don't contain all expected column separators (spanning-style rows)
2065    // must be emitted verbatim for losslessness. The first line's prefix was
2066    // already consumed above; emit its tail and each continuation tail.
2067    if actual_pipe_count != expected_pipe_count {
2068        emit_line_tokens(builder, first_line);
2069        for &idx in &indices[1..] {
2070            let tail = window.emit_or_dispatch_tail(builder, idx);
2071            emit_line_tokens(builder, tail);
2072        }
2073        builder.finish_node();
2074        return;
2075    }
2076
2077    // Emit leading whitespace
2078    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
2079    if leading_ws_len > 0 {
2080        builder.token(
2081            SyntaxKind::WHITESPACE.into(),
2082            &line_without_newline[..leading_ws_len],
2083        );
2084    }
2085
2086    // Emit leading pipe
2087    if trimmed.starts_with('|') {
2088        builder.token(SyntaxKind::TEXT.into(), "|");
2089    }
2090
2091    // Emit each cell based on fixed column widths from separators
2092    let mut pos_byte = 1usize; // after leading pipe
2093    for (idx, cell_content) in cell_contents.iter().enumerate() {
2094        let part = if idx < columns.len() && pos_byte <= trimmed.len() {
2095            let start_byte = pos_byte;
2096            let end_byte = if idx + 1 == columns.len() && !trimmed.is_empty() {
2097                trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
2098            } else {
2099                let (end, next_start) =
2100                    slice_cell_by_display_width(trimmed, pos_byte, columns[idx].width);
2101                pos_byte = next_start;
2102                end
2103            };
2104            let slice = &trimmed[start_byte..end_byte];
2105            if idx + 1 == columns.len() {
2106                pos_byte = trimmed.len();
2107            }
2108            slice
2109        } else {
2110            ""
2111        };
2112
2113        // Emit leading whitespace in cell
2114        let cell_trimmed = part.trim();
2115        let ws_start_len = part.len() - part.trim_start().len();
2116        if ws_start_len > 0 {
2117            builder.token(SyntaxKind::WHITESPACE.into(), &part[..ws_start_len]);
2118        }
2119
2120        // Emit TABLE_CELL with inline parsing
2121        emit_table_cell(builder, cell_content, config);
2122
2123        // Emit trailing whitespace in cell
2124        let ws_end_start = ws_start_len + cell_trimmed.len();
2125        if ws_end_start < part.len() {
2126            builder.token(SyntaxKind::WHITESPACE.into(), &part[ws_end_start..]);
2127        }
2128
2129        // Emit pipe separator (unless this is the last cell and line doesn't end with |)
2130        if idx < cell_contents.len() - 1 || trimmed.ends_with('|') {
2131            builder.token(SyntaxKind::TEXT.into(), "|");
2132        }
2133    }
2134
2135    // Emit trailing whitespace before newline
2136    let trailing_ws_start = leading_ws_len + trimmed.len();
2137    if trailing_ws_start < line_without_newline.len() {
2138        builder.token(
2139            SyntaxKind::WHITESPACE.into(),
2140            &line_without_newline[trailing_ws_start..],
2141        );
2142    }
2143
2144    // Emit newline
2145    if !newline_str.is_empty() {
2146        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2147    }
2148
2149    // Emit continuation lines as TEXT for losslessness, re-emitting each
2150    // line's container prefix first.
2151    for &idx in &indices[1..] {
2152        let tail = window.emit_or_dispatch_tail(builder, idx);
2153        emit_line_tokens(builder, tail);
2154    }
2155
2156    builder.finish_node();
2157}
2158
2159/// Try to parse a grid table starting at the given position.
2160/// Returns the number of lines consumed if successful.
2161pub(crate) fn try_parse_grid_table(
2162    window: &StrippedLines<'_, '_>,
2163    builder: &mut GreenNodeBuilder<'static>,
2164    config: &ParserOptions,
2165) -> Option<usize> {
2166    let lines = window.raw();
2167    let start_pos = window.pos();
2168    if start_pos >= lines.len() {
2169        return None;
2170    }
2171
2172    // Grid-border detection reads the stripped view through `UniformStripView`,
2173    // which strips *every* line — including the dispatch line — with the full
2174    // container strip. The strict column-0 check in `try_parse_grid_separator`
2175    // would otherwise reject a `+---+` border sitting at column 0 of a list
2176    // item's inner content if the dispatch line kept its list-indent. With an
2177    // empty prefix the stripped view equals the raw lines. Emission still goes
2178    // through `window.emit_or_dispatch_tail`, which preserves the indent bytes.
2179    // Scans stop at the first blank line, so only a bounded range is stripped.
2180    let view = UniformStripView(window);
2181
2182    // Cheap gate: a grid table's first line is a grid separator (`+---+`/`+===+`),
2183    // unless this is a caption-led table. Table detection runs at every block
2184    // start, so any per-line work for every prose/math paragraph was quadratic
2185    // on large documents. Run the gate on the same `view` the detection uses, so
2186    // it applies inside containers (blockquote/list) too — not just at top level.
2187    if try_parse_grid_separator(view.line(start_pos)).is_none()
2188        && !is_caption_followed_by_table(&view, start_pos)
2189    {
2190        return None;
2191    }
2192
2193    // Check if this line is a caption followed by a table
2194    // If so, the actual table starts after the caption and blank line
2195    let (actual_start, caption_before) = if is_caption_followed_by_table(&view, start_pos) {
2196        let (cap_start, cap_end) = caption_range_starting_at(&view, start_pos)?;
2197        let mut pos = cap_end;
2198        while pos < view.line_count() && view.line(pos).trim().is_empty() {
2199            pos += 1;
2200        }
2201        (pos, Some((cap_start, cap_end)))
2202    } else {
2203        (start_pos, None)
2204    };
2205
2206    if actual_start >= lines.len() {
2207        return None;
2208    }
2209
2210    // First line must be a grid separator
2211    let first_line = view.line(actual_start);
2212    let _columns = try_parse_grid_separator(first_line)?;
2213
2214    // Track table structure
2215    let mut end_pos = actual_start + 1;
2216    let mut found_header_sep = false;
2217    let mut in_footer = false;
2218
2219    // Scan table lines
2220    while end_pos < lines.len() {
2221        let line = view.line(end_pos);
2222
2223        // Check for blank line (table ends)
2224        if line.trim().is_empty() {
2225            break;
2226        }
2227
2228        // Check for separator line
2229        if let Some(sep_cols) = try_parse_grid_separator(line) {
2230            // Check if this is a header separator (=)
2231            if sep_cols.iter().any(|c| c.is_header_separator) {
2232                if !found_header_sep {
2233                    found_header_sep = true;
2234                } else if !in_footer {
2235                    // Second = separator starts footer
2236                    in_footer = true;
2237                }
2238            }
2239            end_pos += 1;
2240            continue;
2241        }
2242
2243        // Check for content row
2244        if is_grid_content_row(line) {
2245            end_pos += 1;
2246            continue;
2247        }
2248
2249        // Not a valid grid table line - table ends
2250        break;
2251    }
2252
2253    // Must have consumed at least 3 lines (top separator, content, bottom separator)
2254    // Or just top + content rows that end with a separator
2255    if end_pos <= actual_start + 1 {
2256        return None;
2257    }
2258
2259    // Last consumed line should be a separator for a well-formed table
2260    // But we'll be lenient and accept tables ending with content rows
2261
2262    // Check for caption before table (only if we didn't already detected it)
2263    let caption_before = caption_before.or_else(|| find_caption_before_table(&view, actual_start));
2264
2265    // Check for caption after table
2266    let caption_after = if caption_before.is_some() {
2267        None
2268    } else {
2269        find_caption_after_table(&view, end_pos)
2270    };
2271
2272    // Build the grid table
2273    builder.start_node(SyntaxKind::GRID_TABLE.into());
2274
2275    // Emit caption before if present
2276    if let Some((cap_start, cap_end)) = caption_before {
2277        emit_table_caption(builder, window, cap_start, cap_end, config);
2278        // Emit blank line between caption and table if present
2279        emit_caption_blank_lines(builder, window, cap_end, actual_start);
2280    }
2281
2282    // Track whether we've passed the header separator
2283    let mut past_header_sep = false;
2284    let mut in_footer_section = false;
2285    // Accumulate ABSOLUTE indices of the lines making up a multi-line row, so
2286    // each line's container prefix can be re-emitted via the window.
2287    let mut current_row_indices: Vec<usize> = Vec::new();
2288    let mut current_row_kind = SyntaxKind::TABLE_HEADER;
2289
2290    // Emit table rows - accumulate multi-line cells
2291    for idx in actual_start..end_pos {
2292        let line = view.line(idx);
2293        if let Some(sep_cols) = try_parse_grid_separator(line) {
2294            // Separator line - emit any accumulated row first
2295            if !current_row_indices.is_empty() {
2296                emit_grid_table_row(
2297                    builder,
2298                    window,
2299                    &current_row_indices,
2300                    &sep_cols,
2301                    current_row_kind,
2302                    config,
2303                );
2304                current_row_indices.clear();
2305            }
2306
2307            let is_header_sep = sep_cols.iter().any(|c| c.is_header_separator);
2308
2309            // Re-emit any continuation-line container prefix (`  > `) as
2310            // WHITESPACE/BLOCK_QUOTE_MARKER tokens before the separator text.
2311            if is_header_sep {
2312                if !past_header_sep {
2313                    // This is the header/body separator
2314                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2315                    let tail = window.emit_or_dispatch_tail(builder, idx);
2316                    emit_separator_tokens(builder, tail);
2317                    builder.finish_node();
2318                    past_header_sep = true;
2319                } else {
2320                    // Footer separator
2321                    if !in_footer_section {
2322                        in_footer_section = true;
2323                    }
2324                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2325                    let tail = window.emit_or_dispatch_tail(builder, idx);
2326                    emit_separator_tokens(builder, tail);
2327                    builder.finish_node();
2328                }
2329            } else {
2330                // Regular separator (row boundary)
2331                builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2332                let tail = window.emit_or_dispatch_tail(builder, idx);
2333                emit_separator_tokens(builder, tail);
2334                builder.finish_node();
2335            }
2336        } else if is_grid_content_row(line) {
2337            // Content row - accumulate for multi-line cells
2338            current_row_kind = if !past_header_sep && found_header_sep {
2339                SyntaxKind::TABLE_HEADER
2340            } else if in_footer_section {
2341                SyntaxKind::TABLE_FOOTER
2342            } else {
2343                SyntaxKind::TABLE_ROW
2344            };
2345
2346            current_row_indices.push(idx);
2347        }
2348    }
2349
2350    // Emit any remaining accumulated row
2351    if !current_row_indices.is_empty() {
2352        // Use first separator's columns for cell boundaries
2353        if let Some(sep_cols) = try_parse_grid_separator(view.line(actual_start)) {
2354            emit_grid_table_row(
2355                builder,
2356                window,
2357                &current_row_indices,
2358                &sep_cols,
2359                current_row_kind,
2360                config,
2361            );
2362        }
2363    }
2364
2365    // Emit caption after if present
2366    if let Some((cap_start, cap_end)) = caption_after {
2367        emit_caption_blank_lines(builder, window, end_pos, cap_start);
2368        emit_table_caption(builder, window, cap_start, cap_end, config);
2369    }
2370
2371    builder.finish_node(); // GRID_TABLE
2372
2373    // Calculate lines consumed
2374    let table_start = caption_before
2375        .map(|(start, _)| start)
2376        .unwrap_or(actual_start);
2377    let table_end = if let Some((_, cap_end)) = caption_after {
2378        cap_end
2379    } else {
2380        end_pos
2381    };
2382
2383    Some(table_end - table_start)
2384}
2385
2386#[cfg(test)]
2387mod grid_table_tests {
2388    use super::super::container_prefix::ContainerPrefix;
2389    use super::*;
2390
2391    #[test]
2392    fn test_grid_separator_detection() {
2393        assert!(try_parse_grid_separator("+---+---+").is_some());
2394        assert!(try_parse_grid_separator("+===+===+").is_some());
2395        assert!(try_parse_grid_separator("+---------------+---------------+").is_some());
2396        assert!(try_parse_grid_separator("+:---:+").is_some()); // center aligned
2397        assert!(try_parse_grid_separator("not a separator").is_none());
2398        assert!(try_parse_grid_separator("|---|---|").is_none()); // pipe table sep
2399
2400        // A grid border must sit at column 0 of its container content; an
2401        // indented border is not a grid table (matches pandoc, which parses
2402        // an indented `+---+` as a paragraph). Detection runs on the
2403        // container-stripped line, so any remaining leading space disqualifies.
2404        assert!(try_parse_grid_separator(" +---+---+").is_none());
2405        assert!(try_parse_grid_separator("  +---+---+").is_none());
2406        assert!(try_parse_grid_separator("   +===+===+").is_none());
2407    }
2408
2409    #[test]
2410    fn test_grid_header_separator() {
2411        let cols = try_parse_grid_separator("+===+===+").unwrap();
2412        assert!(cols.iter().all(|c| c.is_header_separator));
2413
2414        let cols2 = try_parse_grid_separator("+---+---+").unwrap();
2415        assert!(cols2.iter().all(|c| !c.is_header_separator));
2416    }
2417
2418    #[test]
2419    fn test_grid_content_row_detection() {
2420        assert!(is_grid_content_row("| content | content |"));
2421        assert!(is_grid_content_row("|  |  |"));
2422        assert!(is_grid_content_row("| content +------+"));
2423        assert!(!is_grid_content_row("+---+---+")); // separator, not content
2424        assert!(!is_grid_content_row("no pipes here"));
2425    }
2426
2427    #[test]
2428    fn test_basic_grid_table() {
2429        let input = vec![
2430            "+-------+-------+",
2431            "| Col1  | Col2  |",
2432            "+=======+=======+",
2433            "| A     | B     |",
2434            "+-------+-------+",
2435            "",
2436        ];
2437
2438        let mut builder = GreenNodeBuilder::new();
2439        let prefix = ContainerPrefix::default();
2440        let window = StrippedLines::new(&input, 0, &prefix);
2441        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2442
2443        assert!(result.is_some());
2444        assert_eq!(result.unwrap(), 5);
2445    }
2446
2447    #[test]
2448    fn test_grid_table_multirow() {
2449        let input = vec![
2450            "+---------------+---------------+",
2451            "| Fruit         | Advantages    |",
2452            "+===============+===============+",
2453            "| Bananas       | - wrapper     |",
2454            "|               | - color       |",
2455            "+---------------+---------------+",
2456            "| Oranges       | - scurvy      |",
2457            "|               | - tasty       |",
2458            "+---------------+---------------+",
2459            "",
2460        ];
2461
2462        let mut builder = GreenNodeBuilder::new();
2463        let prefix = ContainerPrefix::default();
2464        let window = StrippedLines::new(&input, 0, &prefix);
2465        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2466
2467        assert!(result.is_some());
2468        assert_eq!(result.unwrap(), 9);
2469    }
2470
2471    #[test]
2472    fn test_grid_table_with_footer() {
2473        let input = vec![
2474            "+-------+-------+",
2475            "| Fruit | Price |",
2476            "+=======+=======+",
2477            "| Apple | $1.00 |",
2478            "+-------+-------+",
2479            "| Pear  | $1.50 |",
2480            "+=======+=======+",
2481            "| Total | $2.50 |",
2482            "+=======+=======+",
2483            "",
2484        ];
2485
2486        let mut builder = GreenNodeBuilder::new();
2487        let prefix = ContainerPrefix::default();
2488        let window = StrippedLines::new(&input, 0, &prefix);
2489        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2490
2491        assert!(result.is_some());
2492        assert_eq!(result.unwrap(), 9);
2493    }
2494
2495    #[test]
2496    fn test_grid_table_headerless() {
2497        let input = vec![
2498            "+-------+-------+",
2499            "| A     | B     |",
2500            "+-------+-------+",
2501            "| C     | D     |",
2502            "+-------+-------+",
2503            "",
2504        ];
2505
2506        let mut builder = GreenNodeBuilder::new();
2507        let prefix = ContainerPrefix::default();
2508        let window = StrippedLines::new(&input, 0, &prefix);
2509        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2510
2511        assert!(result.is_some());
2512        assert_eq!(result.unwrap(), 5);
2513    }
2514
2515    #[test]
2516    fn test_grid_table_with_caption_before() {
2517        let input = vec![
2518            ": Sample table",
2519            "",
2520            "+-------+-------+",
2521            "| A     | B     |",
2522            "+=======+=======+",
2523            "| C     | D     |",
2524            "+-------+-------+",
2525            "",
2526        ];
2527
2528        let mut builder = GreenNodeBuilder::new();
2529        let prefix = ContainerPrefix::default();
2530        let window = StrippedLines::new(&input, 2, &prefix);
2531        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2532
2533        assert!(result.is_some());
2534        // Should include caption + blank + table
2535        assert_eq!(result.unwrap(), 7);
2536    }
2537
2538    #[test]
2539    fn test_grid_table_with_caption_after() {
2540        let input = vec![
2541            "+-------+-------+",
2542            "| A     | B     |",
2543            "+=======+=======+",
2544            "| C     | D     |",
2545            "+-------+-------+",
2546            "",
2547            "Table: My grid table",
2548            "",
2549        ];
2550
2551        let mut builder = GreenNodeBuilder::new();
2552        let prefix = ContainerPrefix::default();
2553        let window = StrippedLines::new(&input, 0, &prefix);
2554        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2555
2556        assert!(result.is_some());
2557        // table + blank + caption
2558        assert_eq!(result.unwrap(), 7);
2559    }
2560}
2561
2562// ============================================================================
2563// Multiline Table Parsing
2564// ============================================================================
2565
2566/// Check if a line is a multiline table separator (continuous dashes).
2567/// Multiline table separators span the full width and are all dashes.
2568/// Returns Some(columns) if valid, None otherwise.
2569fn try_parse_multiline_separator(line: &str) -> Option<Vec<Column>> {
2570    let trimmed = line.trim_start();
2571    let leading_spaces = line.len() - trimmed.len();
2572
2573    // Must have leading spaces <= 3 to not be a code block
2574    if leading_spaces > 3 {
2575        return None;
2576    }
2577
2578    let trimmed = trimmed.trim_end();
2579
2580    // Must be all dashes (continuous line of dashes)
2581    if trimmed.is_empty() || !trimmed.chars().all(|c| c == '-') {
2582        return None;
2583    }
2584
2585    // Must have at least 3 dashes
2586    if trimmed.len() < 3 {
2587        return None;
2588    }
2589
2590    // This is a full-width separator - columns will be determined by column separator lines
2591    Some(vec![Column {
2592        start: leading_spaces,
2593        end: leading_spaces + trimmed.len(),
2594        alignment: Alignment::Default,
2595    }])
2596}
2597
2598/// Check if a line is a column separator line for multiline tables.
2599/// Column separators have dashes with spaces between them to define columns.
2600fn is_column_separator(line: &str) -> bool {
2601    try_parse_table_separator(line).is_some() && !line.contains('*') && !line.contains('_')
2602}
2603
2604fn is_headerless_single_row_without_blank(
2605    lines: &(impl LineView + ?Sized),
2606    row_start: usize,
2607    row_end: usize,
2608    columns: &[Column],
2609) -> bool {
2610    if row_start >= row_end {
2611        return false;
2612    }
2613
2614    if row_end - row_start == 1 {
2615        return false;
2616    }
2617
2618    let Some(last_col) = columns.last() else {
2619        return false;
2620    };
2621
2622    for i in (row_start + 1)..row_end {
2623        let (content, _) = strip_newline(lines.line(i));
2624        let prefix_end = last_col.start.min(content.len());
2625        if !content[..prefix_end].trim().is_empty() {
2626            return false;
2627        }
2628    }
2629
2630    true
2631}
2632
2633/// Try to parse a multiline table starting at the given position.
2634/// Returns the number of lines consumed if successful.
2635pub(crate) fn try_parse_multiline_table(
2636    window: &StrippedLines<'_, '_>,
2637    builder: &mut GreenNodeBuilder<'static>,
2638    config: &ParserOptions,
2639) -> Option<usize> {
2640    let lines = window.raw();
2641    let start_pos = window.pos();
2642    if start_pos >= lines.len() {
2643        return None;
2644    }
2645
2646    // Cheap gate: a multiline table's first line is either a full-width dash
2647    // separator or a column separator. Table detection runs at every block
2648    // start, so any per-line work for every paragraph that can't begin a
2649    // multiline table was quadratic on large documents. Peek just the dispatch
2650    // line via `strip_at` and bail before any further scanning.
2651    let first_line = window.strip_at(start_pos);
2652
2653    // First line can be either:
2654    // 1. A full-width dash separator (for tables with headers)
2655    // 2. A column separator (for headerless tables)
2656    let is_full_width_start = try_parse_multiline_separator(first_line).is_some();
2657    let is_column_sep_start = !is_full_width_start && is_column_separator(first_line);
2658    if !is_full_width_start && !is_column_sep_start {
2659        return None;
2660    }
2661
2662    // Detection scans read the container-prefix-stripped view lazily through the
2663    // window (see `LineView`) so a multiline table nested in `list → blockquote`
2664    // (e.g. `- > ----`) has its `  > ` prefix removed before the
2665    // separator/blank-row shape checks. The interior `>`-only row then strips to
2666    // `""` and registers as a blank row separator. With an empty prefix the
2667    // stripped view equals the raw lines. Scans stop at the first blank/closing
2668    // line, so only a bounded range is stripped. Emission re-emits the prefix
2669    // bytes as tokens via the window; captions read raw `lines`.
2670    let headerless_columns = if is_column_sep_start {
2671        try_parse_table_separator(window.line(start_pos))
2672    } else {
2673        None
2674    };
2675
2676    // Look ahead to find the structure
2677    let mut pos = start_pos + 1;
2678    let mut found_column_sep = is_column_sep_start; // Already found if headerless
2679    let mut column_sep_pos = if is_column_sep_start { start_pos } else { 0 };
2680    let mut has_header = false;
2681    let mut found_blank_line = false;
2682    let mut found_closing_sep = false;
2683    let mut content_line_count = 0usize;
2684
2685    // Scan for header section and column separator
2686    while pos < lines.len() {
2687        let line = window.line(pos);
2688
2689        // Check for column separator (defines columns) - only if we started with full-width
2690        if is_full_width_start && is_column_separator(line) && !found_column_sep {
2691            found_column_sep = true;
2692            column_sep_pos = pos;
2693            has_header = pos > start_pos + 1; // Has header if there's content before column sep
2694            pos += 1;
2695            continue;
2696        }
2697
2698        // Check for blank line (row separator in body)
2699        if line.trim().is_empty() {
2700            found_blank_line = true;
2701            pos += 1;
2702            // Check if next line is a valid closing separator for this table shape.
2703            if pos < lines.len() {
2704                let next = window.line(pos);
2705                let is_valid_closer = if is_full_width_start {
2706                    try_parse_multiline_separator(next).is_some()
2707                } else {
2708                    is_column_separator(next)
2709                };
2710                if is_valid_closer {
2711                    found_closing_sep = true;
2712                    pos += 1; // Include the closing separator
2713                    break;
2714                }
2715            }
2716            continue;
2717        }
2718
2719        // Check for closing full-width dashes (only for full-width-start tables).
2720        if is_full_width_start && try_parse_multiline_separator(line).is_some() {
2721            found_closing_sep = true;
2722            pos += 1;
2723            break;
2724        }
2725
2726        // Check for closing column separator (for headerless tables)
2727        if is_column_sep_start && is_column_separator(line) && content_line_count > 0 {
2728            found_closing_sep = true;
2729            pos += 1;
2730            break;
2731        }
2732
2733        // Content row
2734        content_line_count += 1;
2735        pos += 1;
2736    }
2737
2738    // Must have found a column separator to be a valid multiline table
2739    if !found_column_sep {
2740        return None;
2741    }
2742
2743    // Must have had at least one blank line between rows (distinguishes from simple tables)
2744    if !found_blank_line {
2745        if !is_column_sep_start {
2746            return None;
2747        }
2748        let columns = headerless_columns.as_deref()?;
2749        if !is_headerless_single_row_without_blank(window, start_pos + 1, pos - 1, columns) {
2750            return None;
2751        }
2752    }
2753
2754    // Must have a closing separator
2755    if !found_closing_sep {
2756        return None;
2757    }
2758
2759    // Must have consumed more than just the opening separator
2760    if pos <= start_pos + 2 {
2761        return None;
2762    }
2763
2764    let end_pos = pos;
2765
2766    // Extract column boundaries from the separator line
2767    let columns = try_parse_table_separator(window.line(column_sep_pos))
2768        .expect("Column separator must be valid");
2769
2770    // Check for caption before table
2771    let caption_before = find_caption_before_table(window, start_pos);
2772
2773    // Check for caption after table
2774    let caption_after = if caption_before.is_some() {
2775        None
2776    } else {
2777        find_caption_after_table(window, end_pos)
2778    };
2779
2780    // Build the multiline table
2781    builder.start_node(SyntaxKind::MULTILINE_TABLE.into());
2782
2783    // Emit caption before if present
2784    if let Some((cap_start, cap_end)) = caption_before {
2785        emit_table_caption(builder, window, cap_start, cap_end, config);
2786        // Emit blank line between caption and table if present
2787        emit_caption_blank_lines(builder, window, cap_end, start_pos);
2788    }
2789
2790    // Emit opening separator. The dispatch line's prefix was already consumed
2791    // by core (`dispatch_tail`); a non-dispatch start (caption-before case)
2792    // re-emits its `  > ` prefix via `emit_prefix_at`.
2793    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2794    let tail = window.emit_or_dispatch_tail(builder, start_pos);
2795    emit_separator_tokens(builder, tail);
2796    builder.finish_node();
2797
2798    // Track state for emitting. Accumulate ABSOLUTE indices of the lines making
2799    // up a multi-line row so each line's container prefix can be re-emitted via
2800    // the window.
2801    let mut in_header = has_header;
2802    let mut current_row_indices: Vec<usize> = Vec::new();
2803
2804    for i in (start_pos + 1)..end_pos {
2805        let line = window.line(i);
2806        // Column separator (header/body divider)
2807        if i == column_sep_pos {
2808            // Emit any accumulated header lines
2809            if !current_row_indices.is_empty() {
2810                emit_multiline_table_row(
2811                    builder,
2812                    window,
2813                    &current_row_indices,
2814                    &columns,
2815                    SyntaxKind::TABLE_HEADER,
2816                    config,
2817                );
2818                current_row_indices.clear();
2819            }
2820
2821            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2822            let tail = window.emit_or_dispatch_tail(builder, i);
2823            emit_separator_tokens(builder, tail);
2824            builder.finish_node();
2825            in_header = false;
2826            continue;
2827        }
2828
2829        // Closing separator (full-width or column separator at end)
2830        if try_parse_multiline_separator(line).is_some() || is_column_separator(line) {
2831            // Emit any accumulated row lines
2832            if !current_row_indices.is_empty() {
2833                let kind = if in_header {
2834                    SyntaxKind::TABLE_HEADER
2835                } else {
2836                    SyntaxKind::TABLE_ROW
2837                };
2838                emit_multiline_table_row(
2839                    builder,
2840                    window,
2841                    &current_row_indices,
2842                    &columns,
2843                    kind,
2844                    config,
2845                );
2846                current_row_indices.clear();
2847            }
2848
2849            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2850            let tail = window.emit_or_dispatch_tail(builder, i);
2851            emit_separator_tokens(builder, tail);
2852            builder.finish_node();
2853            continue;
2854        }
2855
2856        // Blank line (row separator)
2857        if line.trim().is_empty() {
2858            // Emit accumulated row
2859            if !current_row_indices.is_empty() {
2860                let kind = if in_header {
2861                    SyntaxKind::TABLE_HEADER
2862                } else {
2863                    SyntaxKind::TABLE_ROW
2864                };
2865                emit_multiline_table_row(
2866                    builder,
2867                    window,
2868                    &current_row_indices,
2869                    &columns,
2870                    kind,
2871                    config,
2872                );
2873                current_row_indices.clear();
2874            }
2875
2876            // Re-emit the interior `>`-only separator row's container prefix
2877            // (`  > `) inside the BLANK_LINE node so it round-trips losslessly.
2878            builder.start_node(SyntaxKind::BLANK_LINE.into());
2879            let tail = window.emit_or_dispatch_tail(builder, i);
2880            builder.token(SyntaxKind::BLANK_LINE.into(), tail);
2881            builder.finish_node();
2882            continue;
2883        }
2884
2885        // Content line - accumulate for current row
2886        current_row_indices.push(i);
2887    }
2888
2889    // Emit any remaining accumulated lines
2890    if !current_row_indices.is_empty() {
2891        let kind = if in_header {
2892            SyntaxKind::TABLE_HEADER
2893        } else {
2894            SyntaxKind::TABLE_ROW
2895        };
2896        emit_multiline_table_row(
2897            builder,
2898            window,
2899            &current_row_indices,
2900            &columns,
2901            kind,
2902            config,
2903        );
2904    }
2905
2906    // Emit caption after if present
2907    if let Some((cap_start, cap_end)) = caption_after {
2908        emit_caption_blank_lines(builder, window, end_pos, cap_start);
2909        emit_table_caption(builder, window, cap_start, cap_end, config);
2910    }
2911
2912    builder.finish_node(); // MultilineTable
2913
2914    // Calculate lines consumed
2915    let table_start = caption_before.map(|(start, _)| start).unwrap_or(start_pos);
2916    let table_end = if let Some((_, cap_end)) = caption_after {
2917        cap_end
2918    } else {
2919        end_pos
2920    };
2921
2922    Some(table_end - table_start)
2923}
2924
2925/// Extract cell contents from first line only (for CST emission).
2926/// Multi-line content will be in continuation TEXT tokens.
2927fn extract_first_line_cell_contents(line: &str, columns: &[Column]) -> Vec<String> {
2928    let (line_content, _) = strip_newline(line);
2929    let mut cells = Vec::new();
2930
2931    for column in columns.iter() {
2932        let column_start = column_offset_to_byte_index(line_content, column.start);
2933        let column_end = column_offset_to_byte_index(line_content, column.end);
2934
2935        // Extract FULL text for this column (including whitespace)
2936        let cell_text = if column_start < column_end {
2937            &line_content[column_start..column_end]
2938        } else if column_start < line_content.len() {
2939            &line_content[column_start..]
2940        } else {
2941            ""
2942        };
2943
2944        cells.push(cell_text.to_string());
2945    }
2946
2947    cells
2948}
2949
2950/// Emit a multiline table row with inline parsing (Phase 7.1).
2951///
2952/// `indices` are ABSOLUTE line indices into the window's raw buffer; each
2953/// physical line re-emits its container prefix (`  > `) via the window before
2954/// its content. With an empty prefix the tails equal the raw lines, so emission
2955/// is byte-identical to the pre-window path.
2956fn emit_multiline_table_row(
2957    builder: &mut GreenNodeBuilder<'static>,
2958    window: &StrippedLines<'_, '_>,
2959    indices: &[usize],
2960    columns: &[Column],
2961    kind: SyntaxKind,
2962    config: &ParserOptions,
2963) {
2964    if indices.is_empty() {
2965        return;
2966    }
2967
2968    builder.start_node(kind.into());
2969
2970    // Emit the first line's container prefix as tokens, then slice cells from
2971    // the prefix-stripped tail (for CST losslessness, only the first physical
2972    // line is parsed into cells; continuation lines stay verbatim TEXT).
2973    let first_line = window.emit_or_dispatch_tail(builder, indices[0]);
2974    let cell_contents = extract_first_line_cell_contents(first_line, columns);
2975    let (trimmed, newline_str) = strip_newline(first_line);
2976    let mut current_pos = 0;
2977
2978    for (col_idx, column) in columns.iter().enumerate() {
2979        let cell_text = &cell_contents[col_idx];
2980        let cell_start = column_offset_to_byte_index(trimmed, column.start);
2981        let cell_end = column_offset_to_byte_index(trimmed, column.end);
2982
2983        // Emit whitespace before cell
2984        if current_pos < cell_start {
2985            builder.token(
2986                SyntaxKind::WHITESPACE.into(),
2987                &trimmed[current_pos..cell_start],
2988            );
2989        }
2990
2991        // Emit cell with inline parsing (first line content only)
2992        emit_table_cell(builder, cell_text, config);
2993
2994        current_pos = cell_end;
2995    }
2996
2997    // Emit trailing whitespace
2998    if current_pos < trimmed.len() {
2999        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
3000    }
3001
3002    // Emit newline
3003    if !newline_str.is_empty() {
3004        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
3005    }
3006
3007    // Emit continuation lines as TEXT to preserve exact line structure,
3008    // re-emitting each line's container prefix first.
3009    for &idx in &indices[1..] {
3010        let tail = window.emit_or_dispatch_tail(builder, idx);
3011        emit_line_tokens(builder, tail);
3012    }
3013
3014    builder.finish_node();
3015}
3016
3017#[cfg(test)]
3018mod multiline_table_tests {
3019    use super::super::container_prefix::ContainerPrefix;
3020    use super::*;
3021    use crate::syntax::SyntaxNode;
3022
3023    #[test]
3024    fn test_multiline_separator_detection() {
3025        assert!(
3026            try_parse_multiline_separator(
3027                "-------------------------------------------------------------"
3028            )
3029            .is_some()
3030        );
3031        assert!(try_parse_multiline_separator("---").is_some());
3032        assert!(try_parse_multiline_separator("  -----").is_some()); // with leading spaces
3033        assert!(try_parse_multiline_separator("--").is_none()); // too short
3034        assert!(try_parse_multiline_separator("--- ---").is_none()); // has spaces
3035        assert!(try_parse_multiline_separator("+---+").is_none()); // grid separator
3036    }
3037
3038    #[test]
3039    fn test_basic_multiline_table() {
3040        let input = vec![
3041            "-------------------------------------------------------------",
3042            " Centered   Default           Right Left",
3043            "  Header    Aligned         Aligned Aligned",
3044            "----------- ------- --------------- -------------------------",
3045            "   First    row                12.0 Example of a row that",
3046            "                                    spans multiple lines.",
3047            "",
3048            "  Second    row                 5.0 Here's another one.",
3049            "-------------------------------------------------------------",
3050            "",
3051        ];
3052
3053        let mut builder = GreenNodeBuilder::new();
3054        let prefix = ContainerPrefix::default();
3055        let window = StrippedLines::new(&input, 0, &prefix);
3056        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3057
3058        assert!(result.is_some());
3059        assert_eq!(result.unwrap(), 9);
3060    }
3061
3062    #[test]
3063    fn test_multiline_table_headerless() {
3064        let input = vec![
3065            "----------- ------- --------------- -------------------------",
3066            "   First    row                12.0 Example of a row that",
3067            "                                    spans multiple lines.",
3068            "",
3069            "  Second    row                 5.0 Here's another one.",
3070            "----------- ------- --------------- -------------------------",
3071            "",
3072        ];
3073
3074        let mut builder = GreenNodeBuilder::new();
3075        let prefix = ContainerPrefix::default();
3076        let window = StrippedLines::new(&input, 0, &prefix);
3077        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3078
3079        assert!(result.is_some());
3080        assert_eq!(result.unwrap(), 6);
3081    }
3082
3083    #[test]
3084    fn test_multiline_table_headerless_single_line_is_not_multiline() {
3085        let input = vec![
3086            "-------     ------ ----------   -------",
3087            "     12     12        12             12",
3088            "-------     ------ ----------   -------",
3089            "",
3090            "Not part of table.",
3091            "",
3092        ];
3093
3094        let mut builder = GreenNodeBuilder::new();
3095        let prefix = ContainerPrefix::default();
3096        let window = StrippedLines::new(&input, 0, &prefix);
3097        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3098
3099        assert!(result.is_none());
3100    }
3101
3102    #[test]
3103    fn test_multiline_table_headerless_single_row_continuation_without_blank_line() {
3104        let input = vec![
3105            "----------  ---------  -----------  ---------------------------",
3106            "   First    row               12.0  Example of a row that spans",
3107            "                                    multiple lines.",
3108            "----------  ---------  -----------  ---------------------------",
3109            "",
3110        ];
3111
3112        let mut builder = GreenNodeBuilder::new();
3113        let prefix = ContainerPrefix::default();
3114        let window = StrippedLines::new(&input, 0, &prefix);
3115        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3116
3117        assert!(result.is_some());
3118        assert_eq!(result.unwrap(), 4);
3119    }
3120
3121    #[test]
3122    fn test_multiline_table_with_caption() {
3123        let input = vec![
3124            "-------------------------------------------------------------",
3125            " Col1       Col2",
3126            "----------- -------",
3127            "   A        B",
3128            "",
3129            "-------------------------------------------------------------",
3130            "",
3131            "Table: Here's the caption.",
3132            "",
3133        ];
3134
3135        let mut builder = GreenNodeBuilder::new();
3136        let prefix = ContainerPrefix::default();
3137        let window = StrippedLines::new(&input, 0, &prefix);
3138        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3139
3140        assert!(result.is_some());
3141        // table (6 lines) + blank + caption
3142        assert_eq!(result.unwrap(), 8);
3143    }
3144
3145    #[test]
3146    fn test_multiline_table_single_row() {
3147        let input = vec![
3148            "---------------------------------------------",
3149            " Header1    Header2",
3150            "----------- -----------",
3151            "   Data     More data",
3152            "",
3153            "---------------------------------------------",
3154            "",
3155        ];
3156
3157        let mut builder = GreenNodeBuilder::new();
3158        let prefix = ContainerPrefix::default();
3159        let window = StrippedLines::new(&input, 0, &prefix);
3160        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3161
3162        assert!(result.is_some());
3163        assert_eq!(result.unwrap(), 6);
3164    }
3165
3166    #[test]
3167    fn test_headerless_multiline_table_does_not_close_on_full_width_rule() {
3168        let input = vec![
3169            "- - - - -",
3170            "Third section with underscores.",
3171            "",
3172            "_____",
3173            "",
3174            "> Quote before rule",
3175            ">",
3176            "> ***",
3177            ">",
3178            "> Quote after rule",
3179            "",
3180            "Final paragraph.",
3181            "",
3182            "Here's a horizontal rule:",
3183            "",
3184            "---",
3185            "Text directly after the horizontal rule.",
3186            "",
3187        ];
3188
3189        let mut builder = GreenNodeBuilder::new();
3190        let prefix = ContainerPrefix::default();
3191        let window = StrippedLines::new(&input, 0, &prefix);
3192        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3193
3194        assert!(result.is_none());
3195    }
3196
3197    #[test]
3198    fn test_not_multiline_table() {
3199        // Simple table should not be parsed as multiline
3200        let input = vec![
3201            "  Right     Left     Center     Default",
3202            "-------     ------ ----------   -------",
3203            "     12     12        12            12",
3204            "",
3205        ];
3206
3207        let mut builder = GreenNodeBuilder::new();
3208        let prefix = ContainerPrefix::default();
3209        let window = StrippedLines::new(&input, 0, &prefix);
3210        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3211
3212        // Should not parse because first line isn't a full-width separator
3213        assert!(result.is_none());
3214    }
3215
3216    // Phase 7.1: Unit tests for emit_table_cell() helper
3217    #[test]
3218    fn test_emit_table_cell_plain_text() {
3219        let mut builder = GreenNodeBuilder::new();
3220        emit_table_cell(&mut builder, "Cell", &ParserOptions::default());
3221        let green = builder.finish();
3222        let node = SyntaxNode::new_root(green);
3223
3224        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3225        assert_eq!(node.text(), "Cell");
3226
3227        // Should have TEXT child
3228        let children: Vec<_> = node.children_with_tokens().collect();
3229        assert_eq!(children.len(), 1);
3230        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
3231    }
3232
3233    #[test]
3234    fn test_emit_table_cell_with_emphasis() {
3235        let mut builder = GreenNodeBuilder::new();
3236        emit_table_cell(&mut builder, "*italic*", &ParserOptions::default());
3237        let green = builder.finish();
3238        let node = SyntaxNode::new_root(green);
3239
3240        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3241        assert_eq!(node.text(), "*italic*");
3242
3243        // Should have EMPHASIS child
3244        let children: Vec<_> = node.children().collect();
3245        assert_eq!(children.len(), 1);
3246        assert_eq!(children[0].kind(), SyntaxKind::EMPHASIS);
3247    }
3248
3249    #[test]
3250    fn test_emit_table_cell_with_code() {
3251        let mut builder = GreenNodeBuilder::new();
3252        emit_table_cell(&mut builder, "`code`", &ParserOptions::default());
3253        let green = builder.finish();
3254        let node = SyntaxNode::new_root(green);
3255
3256        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3257        assert_eq!(node.text(), "`code`");
3258
3259        // Should have CODE_SPAN child
3260        let children: Vec<_> = node.children().collect();
3261        assert_eq!(children.len(), 1);
3262        assert_eq!(children[0].kind(), SyntaxKind::INLINE_CODE);
3263    }
3264
3265    #[test]
3266    fn test_emit_table_cell_with_link() {
3267        let mut builder = GreenNodeBuilder::new();
3268        emit_table_cell(&mut builder, "[text](url)", &ParserOptions::default());
3269        let green = builder.finish();
3270        let node = SyntaxNode::new_root(green);
3271
3272        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3273        assert_eq!(node.text(), "[text](url)");
3274
3275        // Should have LINK child
3276        let children: Vec<_> = node.children().collect();
3277        assert_eq!(children.len(), 1);
3278        assert_eq!(children[0].kind(), SyntaxKind::LINK);
3279    }
3280
3281    #[test]
3282    fn test_emit_table_cell_with_strong() {
3283        let mut builder = GreenNodeBuilder::new();
3284        emit_table_cell(&mut builder, "**bold**", &ParserOptions::default());
3285        let green = builder.finish();
3286        let node = SyntaxNode::new_root(green);
3287
3288        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3289        assert_eq!(node.text(), "**bold**");
3290
3291        // Should have STRONG child
3292        let children: Vec<_> = node.children().collect();
3293        assert_eq!(children.len(), 1);
3294        assert_eq!(children[0].kind(), SyntaxKind::STRONG);
3295    }
3296
3297    #[test]
3298    fn test_emit_table_cell_mixed_inline() {
3299        let mut builder = GreenNodeBuilder::new();
3300        emit_table_cell(
3301            &mut builder,
3302            "Text **bold** and `code`",
3303            &ParserOptions::default(),
3304        );
3305        let green = builder.finish();
3306        let node = SyntaxNode::new_root(green);
3307
3308        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3309        assert_eq!(node.text(), "Text **bold** and `code`");
3310
3311        // Should have multiple children: TEXT, STRONG, TEXT, CODE_SPAN
3312        let children: Vec<_> = node.children_with_tokens().collect();
3313        assert!(children.len() >= 4);
3314
3315        // Check some expected types
3316        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
3317        assert_eq!(children[1].kind(), SyntaxKind::STRONG);
3318    }
3319
3320    #[test]
3321    fn test_emit_table_cell_empty() {
3322        let mut builder = GreenNodeBuilder::new();
3323        emit_table_cell(&mut builder, "", &ParserOptions::default());
3324        let green = builder.finish();
3325        let node = SyntaxNode::new_root(green);
3326
3327        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3328        assert_eq!(node.text(), "");
3329
3330        // Empty cell should have no children
3331        let children: Vec<_> = node.children_with_tokens().collect();
3332        assert_eq!(children.len(), 0);
3333    }
3334
3335    #[test]
3336    fn test_emit_table_cell_escaped_pipe() {
3337        let mut builder = GreenNodeBuilder::new();
3338        emit_table_cell(&mut builder, r"A \| B", &ParserOptions::default());
3339        let green = builder.finish();
3340        let node = SyntaxNode::new_root(green);
3341
3342        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3343        // The escaped pipe should be preserved
3344        assert_eq!(node.text(), r"A \| B");
3345    }
3346}