Skip to main content

panache_parser/parser/blocks/
tables.rs

1//! Simple table parsing for Pandoc's simple_tables extension.
2
3use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6use unicode_width::UnicodeWidthChar;
7
8use crate::parser::utils::attributes::{
9    emit_attribute_node, try_parse_trailing_attributes_with_pos,
10};
11use crate::parser::utils::helpers::{emit_line_tokens, emit_separator_tokens, strip_newline};
12use crate::parser::utils::inline_emission;
13
14use super::container_prefix::StrippedLines;
15
16/// Read-only indexed view over lines for table detection scans. Two
17/// backings:
18///
19/// - `[&str]` — a raw, unstripped line buffer, used by callers that scan
20///   the source directly (the block dispatcher's caption lookahead, list
21///   and definition-list probes).
22/// - [`StrippedLines`] / [`UniformStripView`] — a container-prefix-stripped
23///   view that strips each line lazily on access via
24///   [`StrippedLines::strip_at`]. Detection scans touch only a bounded
25///   range (they stop at the first blank line), so this stays
26///   O(scanned lines) rather than materializing the whole buffer. The old
27///   `strip_all` collected `0..raw.len()` on every call, which was
28///   quadratic when table detection runs at every block start inside a
29///   large blockquote or list.
30pub(crate) trait LineView {
31    /// The line at absolute index `i`.
32    fn line(&self, i: usize) -> &str;
33    /// Total number of lines (absolute upper bound for indices).
34    fn line_count(&self) -> usize;
35}
36
37impl LineView for [&str] {
38    fn line(&self, i: usize) -> &str {
39        self[i]
40    }
41    fn line_count(&self) -> usize {
42        self.len()
43    }
44}
45
46impl<'a, 'p> LineView for StrippedLines<'a, 'p> {
47    fn line(&self, i: usize) -> &str {
48        self.strip_at(i)
49    }
50    fn line_count(&self) -> usize {
51        self.raw().len()
52    }
53}
54
55/// A [`LineView`] over a [`StrippedLines`] window that strips *every* line —
56/// including the dispatch line — with the full container strip rather than
57/// the emission-safe line-0 strip. Grid-border detection needs this: a
58/// `+---+` border sitting at column 0 of a list item's inner content must
59/// not retain the list indent, or the strict column-0 check in
60/// `try_parse_grid_separator` would reject it. Emission still goes through
61/// the window, which preserves the indent bytes. This reproduces the old
62/// grid path's `stripped[dispatch] = prefix.strip(...)` override, but
63/// lazily.
64pub(crate) struct UniformStripView<'s, 'a, 'p>(&'s StrippedLines<'a, 'p>);
65
66impl<'s, 'a, 'p> LineView for UniformStripView<'s, 'a, 'p> {
67    fn line(&self, i: usize) -> &str {
68        self.0.prefix().strip(self.0.raw()[i])
69    }
70    fn line_count(&self) -> usize {
71        self.0.raw().len()
72    }
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76pub enum Alignment {
77    Left,
78    Right,
79    Center,
80    Default,
81}
82
83/// Column information extracted from the separator line.
84#[derive(Debug, Clone)]
85pub(crate) struct Column {
86    /// Start position (byte index) in the line
87    start: usize,
88    /// End position (byte index) in the line
89    end: usize,
90    /// Column alignment
91    alignment: Alignment,
92}
93
94/// Try to detect if a line is a table separator line.
95/// Returns Some(column positions) if it's a valid separator.
96pub(crate) fn try_parse_table_separator(line: &str) -> Option<Vec<Column>> {
97    let trimmed = line.trim_start();
98    // Strip trailing newline if present (CRLF or LF)
99    let (trimmed, newline_str) = strip_newline(trimmed);
100    let leading_spaces = line.len() - trimmed.len() - newline_str.len();
101
102    // Must have leading spaces <= 3 to not be a code block
103    if leading_spaces > 3 {
104        return None;
105    }
106
107    // Simple tables only use dashed separators.
108    if trimmed.contains('*') || trimmed.contains('_') {
109        return None;
110    }
111
112    // Must contain at least one dash
113    if !trimmed.contains('-') {
114        return None;
115    }
116
117    // A separator line consists of dashes and spaces
118    if !trimmed.chars().all(|c| c == '-' || c == ' ') {
119        return None;
120    }
121
122    // Must not be a horizontal rule.
123    let dash_groups: Vec<_> = trimmed.split(' ').filter(|s| !s.is_empty()).collect();
124    if dash_groups.len() <= 1 {
125        return None;
126    }
127
128    // Extract column positions from dash groups
129    let columns = extract_columns(trimmed, leading_spaces);
130
131    if columns.is_empty() {
132        return None;
133    }
134
135    Some(columns)
136}
137
138/// Extract column positions from a separator line.
139fn extract_columns(separator: &str, offset: usize) -> Vec<Column> {
140    let mut columns = Vec::new();
141    let mut in_dashes = false;
142    let mut col_start = 0;
143
144    for (i, ch) in separator.char_indices() {
145        match ch {
146            '-' if !in_dashes => {
147                col_start = i + offset;
148                in_dashes = true;
149            }
150            ' ' if in_dashes => {
151                columns.push(Column {
152                    start: col_start,
153                    end: i + offset,
154                    alignment: Alignment::Default, // Will be determined later
155                });
156                in_dashes = false;
157            }
158            _ => {}
159        }
160    }
161
162    // Handle last column
163    if in_dashes {
164        columns.push(Column {
165            start: col_start,
166            end: separator.len() + offset,
167            alignment: Alignment::Default,
168        });
169    }
170
171    columns
172}
173
174/// Convert a character column offset into a UTF-8 byte index for `line`.
175///
176/// Simple-table column boundaries come from ASCII separator lines where
177/// character and byte offsets are identical. Data rows may contain multibyte
178/// characters, so we must remap offsets before slicing.
179fn column_offset_to_byte_index(line: &str, offset: usize) -> usize {
180    line.char_indices()
181        .nth(offset)
182        .map_or(line.len(), |(byte_idx, _)| byte_idx)
183}
184
185/// Try to parse a table caption from a line.
186/// Returns Some((prefix_len, caption_text)) if it's a caption.
187fn try_parse_caption_prefix(line: &str) -> Option<(usize, &str)> {
188    let trimmed = line.trim_start();
189    let leading_spaces = line.len() - trimmed.len();
190
191    // Must have leading spaces <= 3 to not be a code block
192    if leading_spaces > 3 {
193        return None;
194    }
195
196    // Check for "Table:" or "table:" or just ":".
197    if let Some(rest) = trimmed.strip_prefix("Table:") {
198        Some((leading_spaces + 6, rest))
199    } else if let Some(rest) = trimmed.strip_prefix("table:") {
200        Some((leading_spaces + 6, rest))
201    } else if let Some(rest) = trimmed.strip_prefix(':') {
202        // Just ":" caption markers must be followed by whitespace (Pandoc-style).
203        // This avoids accidentally treating constructs like fenced div fences ":::" as captions.
204        if rest.starts_with(|c: char| c.is_whitespace()) {
205            Some((leading_spaces + 1, rest))
206        } else {
207            None
208        }
209    } else {
210        None
211    }
212}
213
214/// Check if a line could be the start of a table caption.
215fn is_table_caption_start(line: &str) -> bool {
216    try_parse_caption_prefix(line).is_some()
217}
218
219fn is_bare_colon_caption_start(line: &str) -> bool {
220    let trimmed = line.trim_start();
221    trimmed.starts_with(':') && !trimmed.starts_with("::") && !trimmed.starts_with(":::")
222}
223
224fn bare_colon_caption_looks_like_definition_code_block(line: &str) -> bool {
225    let Some((_, rest)) = try_parse_caption_prefix(line) else {
226        return false;
227    };
228    let trimmed = rest.trim_start();
229    trimmed.starts_with("```") || trimmed.starts_with("~~~")
230}
231
232fn line_is_fenced_div_fence(line: &str) -> bool {
233    let trimmed = line.trim_start();
234    let colon_count = trimmed.chars().take_while(|&c| c == ':').count();
235    if colon_count < 3 {
236        return false;
237    }
238    let rest = &trimmed[colon_count..];
239    rest.is_empty() || rest.starts_with(char::is_whitespace)
240}
241
242fn is_valid_caption_start_before_table(lines: &(impl LineView + ?Sized), pos: usize) -> bool {
243    if !is_table_caption_start(lines.line(pos)) {
244        return false;
245    }
246
247    if is_bare_colon_caption_start(lines.line(pos))
248        && bare_colon_caption_looks_like_definition_code_block(lines.line(pos))
249    {
250        return false;
251    }
252
253    // Avoid stealing definition-list definitions (":   ...") as table captions.
254    if is_bare_colon_caption_start(lines.line(pos))
255        && pos > 0
256        && !lines.line(pos - 1).trim().is_empty()
257        && !line_is_fenced_div_fence(lines.line(pos - 1))
258    {
259        return false;
260    }
261    true
262}
263
264/// Check if a line could be the start of a grid table.
265/// Grid tables start with a separator line like +---+---+ or +===+===+
266fn is_grid_table_start(line: &str) -> bool {
267    try_parse_grid_separator(line).is_some()
268}
269
270/// Check if a line could be the start of a multiline table.
271/// Multiline tables start with either:
272/// - A full-width dash separator (----)
273/// - A column separator with dashes and spaces (---- ---- ----)
274fn is_multiline_table_start(line: &str) -> bool {
275    try_parse_multiline_separator(line).is_some() || is_column_separator(line)
276}
277
278/// Check if there's a table following a potential caption at this position.
279/// This is used to avoid parsing a caption as a paragraph when it belongs to a table.
280pub(crate) fn is_caption_followed_by_table(
281    lines: &(impl LineView + ?Sized),
282    caption_pos: usize,
283) -> bool {
284    if caption_pos >= lines.line_count() {
285        return false;
286    }
287
288    // Caption must start with a caption prefix
289    if !is_valid_caption_start_before_table(lines, caption_pos) {
290        return false;
291    }
292
293    let mut pos = caption_pos + 1;
294
295    // Skip continuation lines of caption (non-blank lines).
296    // Stop at fenced-div fences (`:::`) — those close the enclosing div and
297    // must not be folded into the caption.
298    while pos < lines.line_count()
299        && !lines.line(pos).trim().is_empty()
300        && !line_is_fenced_div_fence(lines.line(pos))
301    {
302        // If we hit a table separator, we found a table
303        if try_parse_table_separator(lines.line(pos)).is_some() {
304            return true;
305        }
306        pos += 1;
307    }
308
309    // Skip one blank line
310    if pos < lines.line_count() && lines.line(pos).trim().is_empty() {
311        pos += 1;
312    }
313
314    // Check for a table grid at the next position.
315    table_grid_starts_at(lines, pos)
316}
317
318/// Cheap lookahead: does any table kind's grid begin at absolute line `pos`?
319///
320/// This is the lightweight twin of the block dispatcher's `first_kind_at`,
321/// which answers the same "is there a table here?" question by attempting a
322/// full parse of each kind in turn. We deliberately do **not** call that from
323/// the caption lookahead: caption detection runs at every block start, and a
324/// full per-kind parse there would reintroduce the O(n²) blowup the bounded
325/// separator probe exists to avoid. To keep the two predicates in agreement,
326/// this calls the same primitive separator detectors the real parsers gate on
327/// (`is_grid_table_start` → `try_parse_grid_separator`, `is_multiline_table_start`
328/// → `try_parse_multiline_separator`/`is_column_separator`,
329/// `try_parse_table_separator`, `try_parse_pipe_separator`).
330fn table_grid_starts_at(lines: &(impl LineView + ?Sized), pos: usize) -> bool {
331    if pos >= lines.line_count() {
332        return false;
333    }
334    let line = lines.line(pos);
335
336    // Grid table start (`+---+---+` or `+===+===+`).
337    if is_grid_table_start(line) {
338        return true;
339    }
340
341    // Multiline table start (`----` or `---- ---- ----`).
342    if is_multiline_table_start(line) {
343        return true;
344    }
345
346    // Separator line (simple/pipe table, headerless).
347    if try_parse_table_separator(line).is_some() {
348        return true;
349    }
350
351    // Header line followed by a separator (simple/pipe table with header).
352    if pos + 1 < lines.line_count() && !line.trim().is_empty() {
353        let next_line = lines.line(pos + 1);
354        if try_parse_table_separator(next_line).is_some()
355            || try_parse_pipe_separator(next_line).is_some()
356        {
357            return true;
358        }
359    }
360
361    false
362}
363
364fn caption_range_starting_at(
365    lines: &(impl LineView + ?Sized),
366    start: usize,
367) -> Option<(usize, usize)> {
368    if start >= lines.line_count() || !is_table_caption_start(lines.line(start)) {
369        return None;
370    }
371    let mut end = start + 1;
372    while end < lines.line_count()
373        && !lines.line(end).trim().is_empty()
374        && !line_is_fenced_div_fence(lines.line(end))
375    {
376        end += 1;
377    }
378    Some((start, end))
379}
380
381/// Find caption before table (if any).
382/// Returns (caption_start, caption_end) positions, or None.
383fn find_caption_before_table(
384    lines: &(impl LineView + ?Sized),
385    table_start: usize,
386) -> Option<(usize, usize)> {
387    if table_start == 0 {
388        return None;
389    }
390
391    // Look backward for a caption
392    // Caption must be immediately before table (with possible blank line between)
393    let mut pos = table_start - 1;
394
395    // Skip one blank line if present
396    if lines.line(pos).trim().is_empty() {
397        if pos == 0 {
398            return None;
399        }
400        pos -= 1;
401    }
402
403    // Now pos points to the last non-blank line before the table
404    // This could be the last line of a multiline caption, or a single-line caption
405    let caption_end = pos + 1; // End is exclusive
406
407    // If this line is NOT a caption start, it might be a continuation line
408    // Scan backward through non-blank lines to find the caption start
409    if !is_valid_caption_start_before_table(lines, pos) {
410        // Not a caption start - check if there's a caption start above
411        let mut scan_pos = pos;
412        while scan_pos > 0 {
413            scan_pos -= 1;
414            let line = lines.line(scan_pos);
415
416            // If we hit a blank line or fenced-div fence, we've gone too far
417            if line.trim().is_empty() || line_is_fenced_div_fence(line) {
418                return None;
419            }
420
421            // If we find a caption start, this is the beginning of the multiline caption
422            if is_valid_caption_start_before_table(lines, scan_pos) {
423                if scan_pos > 0 && !lines.line(scan_pos - 1).trim().is_empty() {
424                    return None;
425                }
426                if previous_nonblank_looks_like_table(lines, scan_pos) {
427                    return None;
428                }
429                return Some((scan_pos, caption_end));
430            }
431        }
432        // Scanned to beginning without finding caption start
433        None
434    } else {
435        if pos > 0 && !lines.line(pos - 1).trim().is_empty() {
436            return None;
437        }
438        if previous_nonblank_looks_like_table(lines, pos) {
439            return None;
440        }
441        // This line is a caption start - return the range
442        Some((pos, caption_end))
443    }
444}
445
446fn previous_nonblank_looks_like_table(lines: &(impl LineView + ?Sized), pos: usize) -> bool {
447    if pos == 0 {
448        return false;
449    }
450    // Skip the blank gap directly above the caption candidate.
451    let mut i = pos;
452    while i > 0 && lines.line(i - 1).trim().is_empty() {
453        i -= 1;
454    }
455    // Scan the contiguous non-blank block above for any table shape. A
456    // simple/multiline table's dashed separator sits *above* its data rows
457    // (which are plain text and don't look like table syntax on their own), so
458    // we must walk the whole block, not just the nearest line, to recognize
459    // that this caption is the caption-after of a preceding table rather than a
460    // caption-before of the following one. Stop at the next blank line or a
461    // fenced-div fence.
462    while i > 0 {
463        i -= 1;
464        if lines.line(i).trim().is_empty() || line_is_fenced_div_fence(lines.line(i)) {
465            break;
466        }
467        if line_looks_like_table_syntax(lines.line(i).trim()) {
468            return true;
469        }
470    }
471    false
472}
473
474fn line_looks_like_table_syntax(line: &str) -> bool {
475    if line.starts_with('|') && line.matches('|').count() >= 2 {
476        return true;
477    }
478    if line.starts_with('+') && line.ends_with('+') && (line.contains('-') || line.contains('=')) {
479        return true;
480    }
481    try_parse_table_separator(line).is_some()
482        || try_parse_pipe_separator(line).is_some()
483        || try_parse_grid_separator(line).is_some()
484}
485
486/// Find caption after table (if any).
487/// Returns (caption_start, caption_end) positions, or None.
488fn find_caption_after_table(
489    lines: &(impl LineView + ?Sized),
490    table_end: usize,
491) -> Option<(usize, usize)> {
492    if table_end >= lines.line_count() {
493        return None;
494    }
495
496    let mut pos = table_end;
497
498    // Skip one blank line if present
499    if pos < lines.line_count() && lines.line(pos).trim().is_empty() {
500        pos += 1;
501    }
502
503    if pos >= lines.line_count() {
504        return None;
505    }
506
507    // Check if this line is a caption
508    if is_table_caption_start(lines.line(pos)) {
509        let caption_start = pos;
510        // Find end of caption (continues until blank line or fenced-div fence)
511        let mut caption_end = caption_start + 1;
512        while caption_end < lines.line_count()
513            && !lines.line(caption_end).trim().is_empty()
514            && !line_is_fenced_div_fence(lines.line(caption_end))
515        {
516            caption_end += 1;
517        }
518        Some((caption_start, caption_end))
519    } else {
520        None
521    }
522}
523
524/// Emit a table caption node.
525/// Emit caption text for a single line. If `lift_trailing_attrs` is set and
526/// the text ends with a balanced `{...}` block, lift it into a structural
527/// `ATTRIBUTE` node so `AttributeNode::cast` finds its id (matches Pandoc's
528/// `+caption_attributes` behavior — `: caption {#tbl-id}` gives the table
529/// the id).
530fn emit_caption_line_text(
531    builder: &mut GreenNodeBuilder<'static>,
532    text_with_newline: &str,
533    config: &ParserOptions,
534    lift_trailing_attrs: bool,
535) {
536    let (text, newline_str) = strip_newline(text_with_newline);
537
538    if lift_trailing_attrs
539        && !text.is_empty()
540        && let Some((_attrs, before_attrs, start_brace_pos)) =
541            try_parse_trailing_attributes_with_pos(text)
542    {
543        let trimmed_len = text.trim_end().len();
544        let space = &text[before_attrs.len()..start_brace_pos];
545        let raw_attrs = &text[start_brace_pos..trimmed_len];
546        let trailing_ws = &text[trimmed_len..];
547
548        if !before_attrs.is_empty() {
549            inline_emission::emit_inlines(builder, before_attrs, config, false);
550        }
551        if !space.is_empty() {
552            builder.token(SyntaxKind::WHITESPACE.into(), space);
553        }
554        emit_attribute_node(builder, raw_attrs);
555        if !trailing_ws.is_empty() {
556            builder.token(SyntaxKind::WHITESPACE.into(), trailing_ws);
557        }
558        if !newline_str.is_empty() {
559            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
560        }
561        return;
562    }
563
564    if !text.is_empty() {
565        inline_emission::emit_inlines(builder, text, config, false);
566    }
567    if !newline_str.is_empty() {
568        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
569    }
570}
571
572/// Emit the blank (container-only) lines in the absolute range `[from, to)` as
573/// `BLANK_LINE` nodes. Re-emits each line's container prefix as tokens via the
574/// window, so a `>`-only blank line between a caption and its table inside a
575/// blockquote round-trips losslessly. Mirrors the interior blank-row emitter in
576/// `try_parse_multiline_table`. An empty range emits nothing.
577fn emit_caption_blank_lines(
578    builder: &mut GreenNodeBuilder<'static>,
579    window: &StrippedLines<'_, '_>,
580    from: usize,
581    to: usize,
582) {
583    for abs in from..to {
584        // `window.line` is the container-stripped view, so a `>`-only line reads
585        // as blank.
586        if window.line(abs).trim().is_empty() {
587            builder.start_node(SyntaxKind::BLANK_LINE.into());
588            let tail = window.emit_or_dispatch_tail(builder, abs);
589            builder.token(SyntaxKind::BLANK_LINE.into(), tail);
590            builder.finish_node();
591        }
592    }
593}
594
595fn emit_table_caption(
596    builder: &mut GreenNodeBuilder<'static>,
597    window: &StrippedLines<'_, '_>,
598    start: usize,
599    end: usize,
600    config: &ParserOptions,
601) {
602    builder.start_node(SyntaxKind::TABLE_CAPTION.into());
603
604    let last_idx = (end - start).saturating_sub(1);
605
606    for (i, abs) in (start..end).enumerate() {
607        let lift_attrs = i == last_idx;
608
609        // Re-emit this caption line's container prefix (`>`/whitespace) as
610        // tokens — except the dispatch line, whose prefix the core already
611        // emitted — and operate on the stripped `tail`, so the caption prefix
612        // (`Table:`/`:`) is recognized inside a blockquote or list rather than
613        // swallowed into the caption text (which doubled the marker and broke
614        // losslessness).
615        let tail = window.emit_or_dispatch_tail(builder, abs);
616
617        if i == 0 {
618            // First line - parse and emit prefix separately
619            let trimmed = tail.trim_start();
620            let leading_ws_len = tail.len() - trimmed.len();
621
622            // Emit leading whitespace if present
623            if leading_ws_len > 0 {
624                builder.token(SyntaxKind::WHITESPACE.into(), &tail[..leading_ws_len]);
625            }
626
627            // Check for caption prefix and emit separately
628            // Calculate where the prefix ends (after trimmed content)
629            let prefix_and_rest = if tail.ends_with('\n') {
630                &tail[leading_ws_len..tail.len() - 1] // Exclude newline
631            } else {
632                &tail[leading_ws_len..]
633            };
634
635            let (prefix_len, prefix_text) = if prefix_and_rest.starts_with("Table: ") {
636                (7, "Table: ")
637            } else if prefix_and_rest.starts_with("table: ") {
638                (7, "table: ")
639            } else if prefix_and_rest.starts_with(": ") {
640                (2, ": ")
641            } else if prefix_and_rest.starts_with(':') {
642                (1, ":")
643            } else {
644                (0, "")
645            };
646
647            if prefix_len > 0 {
648                builder.token(SyntaxKind::TABLE_CAPTION_PREFIX.into(), prefix_text);
649
650                // Emit rest of line after prefix
651                let rest_start = leading_ws_len + prefix_len;
652                if rest_start < tail.len() {
653                    emit_caption_line_text(builder, &tail[rest_start..], config, lift_attrs);
654                }
655            } else {
656                // No recognized prefix, emit whole trimmed line
657                emit_caption_line_text(builder, &tail[leading_ws_len..], config, lift_attrs);
658            }
659        } else {
660            // Continuation lines - emit with inline parsing (attrs only on last line).
661            emit_caption_line_text(builder, tail, config, lift_attrs);
662        }
663    }
664
665    builder.finish_node(); // TABLE_CAPTION
666}
667
668/// Emit a table cell with inline content parsing.
669/// This is the core helper for Phase 7.1 table inline parsing migration.
670fn emit_table_cell(
671    builder: &mut GreenNodeBuilder<'static>,
672    cell_text: &str,
673    config: &ParserOptions,
674) {
675    builder.start_node(SyntaxKind::TABLE_CELL.into());
676
677    // Parse inline content within the cell
678    if !cell_text.is_empty() {
679        inline_emission::emit_inlines(builder, cell_text, config, false);
680    }
681
682    builder.finish_node(); // TABLE_CELL
683}
684
685/// Determine column alignments based on separator and optional header.
686fn determine_alignments(columns: &mut [Column], separator_line: &str, header_line: Option<&str>) {
687    for col in columns.iter_mut() {
688        let sep_slice = &separator_line[col.start..col.end];
689
690        if let Some(header) = header_line {
691            let header_start = column_offset_to_byte_index(header, col.start);
692            let header_end = column_offset_to_byte_index(header, col.end);
693
694            // Extract header text for this column
695            let header_text = if header_start < header_end {
696                header[header_start..header_end].trim()
697            } else if header_start < header.len() {
698                header[header_start..].trim()
699            } else {
700                ""
701            };
702
703            if header_text.is_empty() {
704                col.alignment = Alignment::Default;
705                continue;
706            }
707
708            // Find where the header text starts and ends within the column
709            let header_in_col = &header[header_start..header_end];
710            let text_start = header_in_col.len() - header_in_col.trim_start().len();
711            let text_end = header_in_col.trim_end().len() + text_start;
712
713            // Check dash alignment relative to text
714            let dashes_start = 0; // Dashes start at beginning of sep_slice
715            let dashes_end = sep_slice.len();
716
717            let flush_left = dashes_start == text_start;
718            let flush_right = dashes_end == text_end;
719
720            col.alignment = match (flush_left, flush_right) {
721                (true, true) => Alignment::Default,
722                (true, false) => Alignment::Left,
723                (false, true) => Alignment::Right,
724                (false, false) => Alignment::Center,
725            };
726        } else {
727            // Without header, alignment based on first row (we'll handle this later)
728            col.alignment = Alignment::Default;
729        }
730    }
731}
732
733/// Try to parse a simple table starting at the given position.
734/// Returns the number of lines consumed if successful.
735pub(crate) fn try_parse_simple_table(
736    window: &StrippedLines<'_, '_>,
737    builder: &mut GreenNodeBuilder<'static>,
738    config: &ParserOptions,
739) -> Option<usize> {
740    let lines = window.raw();
741    let start_pos = window.pos();
742    log::trace!("try_parse_simple_table at line {}", start_pos + 1);
743
744    if start_pos >= lines.len() {
745        return None;
746    }
747
748    // Cheap gate before the O(buffer) `strip_all` below: a simple table's
749    // separator must sit on the dispatch line or the line just after it (see
750    // `find_separator_line`). Table detection runs at every block start, so
751    // stripping the whole line buffer for every prose/math paragraph that
752    // can't be a table was quadratic on large documents. Peek just those one
753    // or two lines via `strip_at` and bail before materializing the full view.
754    let gate_first = window.strip_at(start_pos);
755    let separator_here = try_parse_table_separator(gate_first).is_some();
756    let separator_next = !separator_here
757        && start_pos + 1 < lines.len()
758        && !gate_first.trim().is_empty()
759        && try_parse_table_separator(window.strip_at(start_pos + 1)).is_some();
760    if !separator_here && !separator_next {
761        return None;
762    }
763
764    // Detection scans read the container-prefix-stripped view lazily through
765    // the window (see `LineView`): a table nested in `list → blockquote`
766    // (e.g. `- >  a   b`) has its `  > ` prefix removed before the
767    // separator/column-shape checks. With an empty prefix the stripped view
768    // equals the raw lines. Scans stop at the first blank line, so only a
769    // bounded range is ever stripped. Emission re-emits the prefix bytes as
770    // tokens via the window; captions/blank lines still read raw `lines`.
771
772    // Look for a separator line
773    let separator_pos = find_separator_line(window, start_pos)?;
774    log::trace!("  found separator at line {}", separator_pos + 1);
775
776    let separator_line = window.line(separator_pos);
777    let mut columns = try_parse_table_separator(separator_line)?;
778
779    // Determine if there's a header (separator not at start)
780    let has_header = separator_pos > start_pos;
781    let header_line = if has_header {
782        Some(window.line(separator_pos - 1))
783    } else {
784        None
785    };
786
787    // Determine alignments
788    determine_alignments(&mut columns, separator_line, header_line);
789
790    // Find table end (blank line or end of input)
791    let end_pos = find_table_end(window, separator_pos + 1);
792
793    // Must have at least one data row (or it's just a separator)
794    let data_rows = end_pos - separator_pos - 1;
795
796    if data_rows == 0 {
797        return None;
798    }
799
800    // Check for caption before table
801    let caption_before = find_caption_before_table(window, start_pos);
802
803    // Check for caption after table
804    let caption_after = if caption_before.is_some() {
805        None
806    } else {
807        find_caption_after_table(window, end_pos)
808    };
809
810    // Build the table
811    builder.start_node(SyntaxKind::SIMPLE_TABLE.into());
812
813    // Emit caption before if present
814    if let Some((cap_start, cap_end)) = caption_before {
815        emit_table_caption(builder, window, cap_start, cap_end, config);
816        // Emit blank line between caption and table if present
817        emit_caption_blank_lines(builder, window, cap_end, start_pos);
818    }
819
820    // Emit header if present. On the dispatch line the core already emitted
821    // the container prefix; only continuation rows re-emit it (via the window
822    // inside `emit_table_row`).
823    if has_header {
824        emit_table_row(
825            builder,
826            window,
827            separator_pos - 1,
828            &columns,
829            SyntaxKind::TABLE_HEADER,
830            config,
831        );
832    }
833
834    // Emit separator, re-emitting any continuation-line container prefix
835    // (`  > `) as WHITESPACE/BLOCK_QUOTE_MARKER tokens before the row text.
836    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
837    let separator_tail = window.emit_or_dispatch_tail(builder, separator_pos);
838    emit_separator_tokens(builder, separator_tail);
839    builder.finish_node();
840
841    // Emit data rows (always continuation lines)
842    for idx in (separator_pos + 1)..end_pos {
843        emit_table_row(
844            builder,
845            window,
846            idx,
847            &columns,
848            SyntaxKind::TABLE_ROW,
849            config,
850        );
851    }
852
853    // Emit caption after if present
854    if let Some((cap_start, cap_end)) = caption_after {
855        // Emit blank line before caption if needed
856        emit_caption_blank_lines(builder, window, end_pos, cap_start);
857        emit_table_caption(builder, window, cap_start, cap_end, config);
858    }
859
860    builder.finish_node(); // SimpleTable
861
862    // Calculate lines consumed (including captions)
863    let table_start = if let Some((cap_start, _)) = caption_before {
864        cap_start
865    } else if has_header {
866        separator_pos - 1
867    } else {
868        separator_pos
869    };
870
871    let table_end = if let Some((_, cap_end)) = caption_after {
872        cap_end
873    } else {
874        end_pos
875    };
876
877    let lines_consumed = table_end - table_start;
878
879    Some(lines_consumed)
880}
881
882/// Find the position of a separator line starting from pos.
883fn find_separator_line(lines: &(impl LineView + ?Sized), start_pos: usize) -> Option<usize> {
884    log::trace!("  find_separator_line from line {}", start_pos + 1);
885
886    // Check first line
887    log::trace!("    checking first line: {:?}", lines.line(start_pos));
888    if try_parse_table_separator(lines.line(start_pos)).is_some() {
889        log::trace!("    separator found at first line");
890        return Some(start_pos);
891    }
892
893    // Check second line (for table with header)
894    if start_pos + 1 < lines.line_count()
895        && !lines.line(start_pos).trim().is_empty()
896        && try_parse_table_separator(lines.line(start_pos + 1)).is_some()
897    {
898        return Some(start_pos + 1);
899    }
900    None
901}
902
903/// Find where the table ends (first blank line or end of input).
904fn find_table_end(lines: &(impl LineView + ?Sized), start_pos: usize) -> usize {
905    for i in start_pos..lines.line_count() {
906        if lines.line(i).trim().is_empty() {
907            return i;
908        }
909        // Check if this could be a closing separator
910        if try_parse_table_separator(lines.line(i)).is_some() {
911            // Check if next line is blank or end
912            if i + 1 >= lines.line_count() || lines.line(i + 1).trim().is_empty() {
913                return i + 1;
914            }
915        }
916    }
917    lines.line_count()
918}
919
920/// Emit a table row (header or data row) with inline-parsed cells for simple tables.
921/// Uses column boundaries from the separator line to extract cells.
922fn emit_table_row(
923    builder: &mut GreenNodeBuilder<'static>,
924    window: &StrippedLines<'_, '_>,
925    abs_idx: usize,
926    columns: &[Column],
927    row_kind: SyntaxKind,
928    config: &ParserOptions,
929) {
930    builder.start_node(row_kind.into());
931
932    // On continuation lines the leading `  > ` prefix is re-emitted as
933    // WHITESPACE/BLOCK_QUOTE_MARKER tokens inside the row node and the
934    // stripped tail returned; the dispatch line just strips its (already
935    // core-emitted) prefix. Empty prefix ⇒ the raw line.
936    let line = window.emit_or_dispatch_tail(builder, abs_idx);
937
938    let (line_without_newline, newline_str) = strip_newline(line);
939
940    // Emit leading whitespace if present
941    let trimmed = line_without_newline.trim_start();
942    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
943    if leading_ws_len > 0 {
944        builder.token(
945            SyntaxKind::WHITESPACE.into(),
946            &line_without_newline[..leading_ws_len],
947        );
948    }
949
950    // Track where we are in the line (for losslessness)
951    let mut current_pos = 0;
952
953    // Extract and emit cells based on column boundaries
954    for (i, col) in columns.iter().enumerate() {
955        // Calculate actual positions in the trimmed line (accounting for leading whitespace)
956        let cell_start = if col.start >= leading_ws_len {
957            column_offset_to_byte_index(trimmed, col.start - leading_ws_len)
958        } else {
959            0
960        };
961
962        // A column spans from its own start to the start of the next column
963        // (the inter-column gap belongs to the left column); the last column
964        // runs to end-of-line. Ending the slice at the dash-run end instead
965        // would split cell text that overruns a short dash run into the cell
966        // plus a bogus WHITESPACE token.
967        let end_offset = columns.get(i + 1).map_or(usize::MAX, |next| next.start);
968        let cell_end = if end_offset == usize::MAX {
969            trimmed.len()
970        } else if end_offset >= leading_ws_len {
971            column_offset_to_byte_index(trimmed, end_offset - leading_ws_len)
972        } else {
973            0
974        };
975
976        // Extract cell text from column bounds. When the column lies entirely
977        // before the trimmed content (col.end <= leading_ws_len) both bounds
978        // clamp to 0; treat that as an empty cell rather than re-emitting the
979        // whole row.
980        let cell_text = if cell_start < cell_end && cell_start < trimmed.len() {
981            &trimmed[cell_start..cell_end]
982        } else {
983            ""
984        };
985
986        let cell_content = cell_text.trim();
987        let cell_content_start = cell_text.len() - cell_text.trim_start().len();
988
989        // Emit any whitespace from current position to start of cell content
990        let content_abs_pos = (cell_start + cell_content_start).min(trimmed.len());
991        if current_pos < content_abs_pos {
992            builder.token(
993                SyntaxKind::WHITESPACE.into(),
994                &trimmed[current_pos..content_abs_pos],
995            );
996        }
997
998        // Emit cell with inline parsing
999        emit_table_cell(builder, cell_content, config);
1000
1001        // Update current position to end of cell content
1002        current_pos = content_abs_pos + cell_content.len();
1003    }
1004
1005    // Emit any remaining whitespace after last cell
1006    if current_pos < trimmed.len() {
1007        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
1008    }
1009
1010    // Emit newline if present
1011    if !newline_str.is_empty() {
1012        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1013    }
1014
1015    builder.finish_node();
1016}
1017
1018// ============================================================================
1019// Pipe Table Parsing
1020// ============================================================================
1021
1022/// Check if a line is a pipe table separator line.
1023/// Returns the column alignments if it's a valid separator.
1024fn try_parse_pipe_separator(line: &str) -> Option<Vec<Alignment>> {
1025    let trimmed = line.trim();
1026
1027    // Must contain at least one pipe
1028    if !trimmed.contains('|') && !trimmed.contains('+') {
1029        return None;
1030    }
1031
1032    // Split by pipes (or + for orgtbl variant)
1033    let cells: Vec<&str> = if trimmed.contains('+') {
1034        // Orgtbl variant: use + as separator in separator line
1035        trimmed.split(['|', '+']).collect()
1036    } else {
1037        trimmed.split('|').collect()
1038    };
1039
1040    let mut alignments = Vec::new();
1041
1042    for cell in cells {
1043        let cell = cell.trim();
1044
1045        // Skip empty cells (from leading/trailing pipes)
1046        if cell.is_empty() {
1047            continue;
1048        }
1049
1050        // Must be dashes with optional colons
1051        let starts_colon = cell.starts_with(':');
1052        let ends_colon = cell.ends_with(':');
1053
1054        // Remove colons to check if rest is all dashes
1055        let without_colons = cell.trim_start_matches(':').trim_end_matches(':');
1056
1057        // Must have at least one dash
1058        if without_colons.is_empty() || !without_colons.chars().all(|c| c == '-') {
1059            return None;
1060        }
1061
1062        // Determine alignment from colon positions
1063        let alignment = match (starts_colon, ends_colon) {
1064            (true, true) => Alignment::Center,
1065            (true, false) => Alignment::Left,
1066            (false, true) => Alignment::Right,
1067            (false, false) => Alignment::Default,
1068        };
1069
1070        alignments.push(alignment);
1071    }
1072
1073    // Must have at least one column
1074    if alignments.is_empty() {
1075        None
1076    } else {
1077        Some(alignments)
1078    }
1079}
1080
1081/// Split a pipe table row into cells.
1082/// Handles escaped pipes (\|) properly by not splitting on them.
1083fn parse_pipe_table_row(line: &str) -> Vec<String> {
1084    let trimmed = line.trim();
1085
1086    let mut cells = Vec::new();
1087    let mut current_cell = String::new();
1088    let mut chars = trimmed.chars().peekable();
1089    let mut char_count = 0;
1090
1091    while let Some(ch) = chars.next() {
1092        char_count += 1;
1093        match ch {
1094            '\\' => {
1095                // Check if next char is a pipe - if so, it's an escaped pipe
1096                if let Some(&'|') = chars.peek() {
1097                    current_cell.push('\\');
1098                    current_cell.push('|');
1099                    chars.next(); // consume the pipe
1100                } else {
1101                    current_cell.push(ch);
1102                }
1103            }
1104            '|' => {
1105                // Check if this is the leading pipe (first character)
1106                if char_count == 1 {
1107                    continue; // Skip leading pipe
1108                }
1109
1110                // End current cell, start new one
1111                cells.push(current_cell.trim().to_string());
1112                current_cell.clear();
1113            }
1114            _ => {
1115                current_cell.push(ch);
1116            }
1117        }
1118    }
1119
1120    // Add last cell if it's not empty (it would be empty if line ended with pipe)
1121    let trimmed_cell = current_cell.trim().to_string();
1122    if !trimmed_cell.is_empty() {
1123        cells.push(trimmed_cell);
1124    }
1125
1126    cells
1127}
1128
1129/// Emit a pipe table row with inline-parsed cells.
1130/// Preserves losslessness by emitting exact byte representation while parsing cell content inline.
1131fn emit_pipe_table_row(
1132    builder: &mut GreenNodeBuilder<'static>,
1133    window: &StrippedLines<'_, '_>,
1134    abs_idx: usize,
1135    row_kind: SyntaxKind,
1136    config: &ParserOptions,
1137) {
1138    builder.start_node(row_kind.into());
1139
1140    // On continuation lines (separator/data rows under a list+blockquote
1141    // container) the leading `  > ` prefix is not consumed by the core;
1142    // `emit_prefix_at` re-emits it as WHITESPACE/BLOCK_QUOTE_MARKER tokens
1143    // and returns the stripped tail. On the dispatch line the core already
1144    // emitted the prefix, so `dispatch_tail` just strips it from our view.
1145    // With an empty prefix (non-nested tables) both are no-ops returning
1146    // the raw line.
1147    let line = if abs_idx == window.dispatch_pos() {
1148        window.dispatch_tail()
1149    } else {
1150        window.emit_prefix_at(builder, abs_idx)
1151    };
1152
1153    let (line_without_newline, newline_str) = strip_newline(line);
1154    let trimmed = line_without_newline.trim();
1155
1156    // Parse cell boundaries
1157    let mut cell_starts = Vec::new();
1158    let mut cell_ends = Vec::new();
1159    let mut in_escape = false;
1160
1161    // Find all pipe positions (excluding escaped ones)
1162    let mut pipe_positions = Vec::new();
1163    for (i, ch) in trimmed.char_indices() {
1164        if in_escape {
1165            in_escape = false;
1166            continue;
1167        }
1168        if ch == '\\' {
1169            in_escape = true;
1170            continue;
1171        }
1172        if ch == '|' {
1173            pipe_positions.push(i);
1174        }
1175    }
1176
1177    // Determine cell boundaries based on pipe positions
1178    if pipe_positions.is_empty() {
1179        // No pipes - treat entire line as one cell (shouldn't happen for valid pipe tables)
1180        cell_starts.push(0);
1181        cell_ends.push(trimmed.len());
1182    } else {
1183        // Check if line starts with pipe
1184        let start_pipe = pipe_positions.first() == Some(&0);
1185        // Check if line ends with pipe
1186        let end_pipe = pipe_positions.last() == Some(&(trimmed.len() - 1));
1187
1188        if start_pipe {
1189            // Skip first pipe
1190            for i in 1..pipe_positions.len() {
1191                cell_starts.push(pipe_positions[i - 1] + 1);
1192                cell_ends.push(pipe_positions[i]);
1193            }
1194            // Add last cell if there's no trailing pipe
1195            if !end_pipe {
1196                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1197                cell_ends.push(trimmed.len());
1198            }
1199        } else {
1200            // No leading pipe
1201            cell_starts.push(0);
1202            cell_ends.push(pipe_positions[0]);
1203
1204            for i in 1..pipe_positions.len() {
1205                cell_starts.push(pipe_positions[i - 1] + 1);
1206                cell_ends.push(pipe_positions[i]);
1207            }
1208
1209            // Add last cell if there's no trailing pipe
1210            if !end_pipe {
1211                cell_starts.push(*pipe_positions.last().unwrap() + 1);
1212                cell_ends.push(trimmed.len());
1213            }
1214        }
1215    }
1216
1217    // Emit leading whitespace if present (before trim)
1218    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
1219    if leading_ws_len > 0 {
1220        builder.token(
1221            SyntaxKind::WHITESPACE.into(),
1222            &line_without_newline[..leading_ws_len],
1223        );
1224    }
1225
1226    // Emit cells with pipes
1227    for (idx, (start, end)) in cell_starts.iter().zip(cell_ends.iter()).enumerate() {
1228        // Emit pipe before cell (except for first cell if no leading pipe)
1229        if *start > 0 {
1230            builder.token(SyntaxKind::TEXT.into(), "|");
1231        } else if idx == 0 && trimmed.starts_with('|') {
1232            // Leading pipe
1233            builder.token(SyntaxKind::TEXT.into(), "|");
1234        }
1235
1236        // Get cell content with its whitespace
1237        let cell_with_ws = &trimmed[*start..*end];
1238        let cell_content = cell_with_ws.trim();
1239
1240        // Emit leading whitespace within cell
1241        let cell_leading_ws = &cell_with_ws[..cell_with_ws.len() - cell_with_ws.trim_start().len()];
1242        if !cell_leading_ws.is_empty() {
1243            builder.token(SyntaxKind::WHITESPACE.into(), cell_leading_ws);
1244        }
1245
1246        // Emit cell with inline parsing
1247        emit_table_cell(builder, cell_content, config);
1248
1249        // Emit trailing whitespace within cell
1250        let cell_trailing_ws_start = cell_leading_ws.len() + cell_content.len();
1251        if cell_trailing_ws_start < cell_with_ws.len() {
1252            builder.token(
1253                SyntaxKind::WHITESPACE.into(),
1254                &cell_with_ws[cell_trailing_ws_start..],
1255            );
1256        }
1257    }
1258
1259    // Emit trailing pipe if present
1260    if !pipe_positions.is_empty() && trimmed.ends_with('|') {
1261        builder.token(SyntaxKind::TEXT.into(), "|");
1262    }
1263
1264    // Emit trailing whitespace after trim (before newline)
1265    let trailing_ws_start = leading_ws_len + trimmed.len();
1266    if trailing_ws_start < line_without_newline.len() {
1267        builder.token(
1268            SyntaxKind::WHITESPACE.into(),
1269            &line_without_newline[trailing_ws_start..],
1270        );
1271    }
1272
1273    // Emit newline
1274    if !newline_str.is_empty() {
1275        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1276    }
1277
1278    builder.finish_node();
1279}
1280
1281/// Try to parse a pipe table starting at the given position.
1282/// Returns the number of lines consumed if successful.
1283pub(crate) fn try_parse_pipe_table(
1284    window: &StrippedLines<'_, '_>,
1285    builder: &mut GreenNodeBuilder<'static>,
1286    config: &ParserOptions,
1287) -> Option<usize> {
1288    let lines = window.raw();
1289    let start_pos = window.pos();
1290    if start_pos + 1 >= lines.len() {
1291        return None;
1292    }
1293
1294    // Cheap gate: a pipe table's first line must contain a `|` (it is either
1295    // the header or, headerless, the delimiter row), unless this is a
1296    // caption-led table. Table detection runs at every block start, so doing
1297    // any per-line work for every prose/math paragraph was quadratic on large
1298    // documents. Peek the dispatch line and run the (bounded) caption probe on
1299    // the same stripped `window` the detection below uses, so the gate applies
1300    // inside containers (blockquote/list) too — not just at top level.
1301    if !window.strip_at(start_pos).contains('|') && !is_caption_followed_by_table(window, start_pos)
1302    {
1303        return None;
1304    }
1305
1306    // Detection scans read the container-prefix-stripped view lazily through
1307    // the window (see `LineView`), so a table nested in `list → blockquote`
1308    // (e.g. `- > | a | b |`) has its `  > ` prefix removed before the
1309    // separator/cell shape checks. The dispatch line uses the emission-safe
1310    // line-0 strip (its prefix was consumed by the core); every other line
1311    // gets the full continuation strip. Scans stop at the first blank line, so
1312    // only a bounded range is stripped. Emission still reads raw `lines` so the
1313    // prefix bytes can be re-emitted as tokens.
1314
1315    // Check if this line is a caption followed by a table
1316    // If so, the actual table starts after the caption and blank line
1317    let (actual_start, caption_before) = if is_caption_followed_by_table(window, start_pos) {
1318        let (cap_start, cap_end) = caption_range_starting_at(window, start_pos)?;
1319        let mut pos = cap_end;
1320        while pos < window.line_count() && window.line(pos).trim().is_empty() {
1321            pos += 1;
1322        }
1323        (pos, Some((cap_start, cap_end)))
1324    } else {
1325        (start_pos, None)
1326    };
1327
1328    if actual_start + 1 >= lines.len() {
1329        return None;
1330    }
1331
1332    // First line should have pipes (potential header)
1333    if !window.line(actual_start).contains('|') {
1334        return None;
1335    }
1336
1337    // Second line should be separator
1338    let alignments = try_parse_pipe_separator(window.line(actual_start + 1))?;
1339
1340    // Parse header cells
1341    let header_cells = parse_pipe_table_row(window.line(actual_start));
1342
1343    // Number of columns should match (approximately - be lenient)
1344    if header_cells.len() != alignments.len() && !header_cells.is_empty() {
1345        // Only fail if very different
1346        if header_cells.len() < alignments.len() / 2 || header_cells.len() > alignments.len() * 2 {
1347            return None;
1348        }
1349    }
1350
1351    // Find table end (first blank line or end of input)
1352    let mut end_pos = actual_start + 2;
1353    while end_pos < window.line_count() {
1354        let line = window.line(end_pos);
1355        if line.trim().is_empty() {
1356            break;
1357        }
1358        // Row should have pipes
1359        if !line.contains('|') {
1360            break;
1361        }
1362        end_pos += 1;
1363    }
1364
1365    // Must have at least one data row
1366    if end_pos <= actual_start + 2 {
1367        return None;
1368    }
1369
1370    // Check for caption before table (only if we didn't already detect it)
1371    let caption_before = caption_before.or_else(|| find_caption_before_table(window, actual_start));
1372
1373    // Check for caption after table
1374    let caption_after = if caption_before.is_some() {
1375        None
1376    } else {
1377        find_caption_after_table(window, end_pos)
1378    };
1379
1380    // Build the pipe table
1381    builder.start_node(SyntaxKind::PIPE_TABLE.into());
1382
1383    // Emit caption before if present
1384    if let Some((cap_start, cap_end)) = caption_before {
1385        emit_table_caption(builder, window, cap_start, cap_end, config);
1386        // Emit blank line between caption and table if present
1387        emit_caption_blank_lines(builder, window, cap_end, actual_start);
1388    }
1389
1390    // Emit header row with inline-parsed cells. On the dispatch line the
1391    // core already emitted the container prefix; only when the header is a
1392    // continuation line (e.g. it follows a caption-before line) do we emit
1393    // the prefix here.
1394    emit_pipe_table_row(
1395        builder,
1396        window,
1397        actual_start,
1398        SyntaxKind::TABLE_HEADER,
1399        config,
1400    );
1401
1402    // Emit separator, re-emitting any continuation-line container prefix
1403    // (`  > `) as WHITESPACE/BLOCK_QUOTE_MARKER tokens before the row text.
1404    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
1405    let sep_idx = actual_start + 1;
1406    let separator_tail = if sep_idx == window.dispatch_pos() {
1407        window.dispatch_tail()
1408    } else {
1409        window.emit_prefix_at(builder, sep_idx)
1410    };
1411    emit_separator_tokens(builder, separator_tail);
1412    builder.finish_node();
1413
1414    // Emit data rows with inline-parsed cells (always continuation lines)
1415    for idx in (actual_start + 2)..end_pos {
1416        emit_pipe_table_row(builder, window, idx, SyntaxKind::TABLE_ROW, config);
1417    }
1418
1419    // Emit caption after if present
1420    if let Some((cap_start, cap_end)) = caption_after {
1421        // Emit blank line before caption if needed
1422        emit_caption_blank_lines(builder, window, end_pos, cap_start);
1423        emit_table_caption(builder, window, cap_start, cap_end, config);
1424    }
1425
1426    builder.finish_node(); // PipeTable
1427
1428    // Calculate lines consumed
1429    let table_start = caption_before
1430        .map(|(start, _)| start)
1431        .unwrap_or(actual_start);
1432    let table_end = if let Some((_, cap_end)) = caption_after {
1433        cap_end
1434    } else {
1435        end_pos
1436    };
1437
1438    Some(table_end - table_start)
1439}
1440
1441#[cfg(test)]
1442mod tests {
1443    use super::super::container_prefix::ContainerPrefix;
1444    use super::*;
1445
1446    #[test]
1447    fn test_separator_detection() {
1448        assert!(try_parse_table_separator("------- ------ ----------   -------").is_some());
1449        assert!(try_parse_table_separator("  ---  ---  ---").is_some());
1450        assert!(try_parse_table_separator("-------").is_none()); // horizontal rule
1451        assert!(try_parse_table_separator("--- --- ---").is_some()); // table separator
1452    }
1453
1454    #[test]
1455    fn test_column_extraction() {
1456        let line = "-------     ------ ----------   -------";
1457        let columns = extract_columns(line, 0);
1458        assert_eq!(columns.len(), 4);
1459    }
1460
1461    #[test]
1462    fn test_simple_table_with_header() {
1463        let input = vec![
1464            "  Right     Left     Center     Default",
1465            "-------     ------ ----------   -------",
1466            "     12     12        12            12",
1467            "    123     123       123          123",
1468            "",
1469        ];
1470
1471        let mut builder = GreenNodeBuilder::new();
1472        let prefix = ContainerPrefix::default();
1473        let window = StrippedLines::new(&input, 0, &prefix);
1474        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1475
1476        assert!(result.is_some());
1477        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1478    }
1479
1480    #[test]
1481    fn test_headerless_table() {
1482        let input = vec![
1483            "-------     ------ ----------   -------",
1484            "     12     12        12            12",
1485            "    123     123       123          123",
1486            "",
1487        ];
1488
1489        let mut builder = GreenNodeBuilder::new();
1490        let prefix = ContainerPrefix::default();
1491        let window = StrippedLines::new(&input, 0, &prefix);
1492        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1493
1494        assert!(result.is_some());
1495        assert_eq!(result.unwrap(), 3); // sep + 2 rows
1496    }
1497
1498    #[test]
1499    fn test_caption_prefix_detection() {
1500        assert!(try_parse_caption_prefix("Table: My caption").is_some());
1501        assert!(try_parse_caption_prefix("table: My caption").is_some());
1502        assert!(try_parse_caption_prefix(": My caption").is_some());
1503        assert!(try_parse_caption_prefix(":").is_none()); // Just colon, no content
1504        assert!(try_parse_caption_prefix("Not a caption").is_none());
1505    }
1506
1507    #[test]
1508    fn table_grid_starts_at_matches_each_kind() {
1509        // Positives — one shape per table kind the real parsers accept.
1510        assert!(table_grid_starts_at(&["+---+---+"][..], 0)); // grid
1511        assert!(table_grid_starts_at(&["----------- -------"][..], 0)); // multiline
1512        assert!(table_grid_starts_at(&["--- --- ---"][..], 0)); // simple, headerless
1513        assert!(table_grid_starts_at(&["A | B", "| --- | --- |"][..], 0)); // pipe, header + sep
1514        assert!(table_grid_starts_at(&["A    B", "--- ---"][..], 0)); // simple, header + sep
1515        // A lone dash run is a multiline full-width separator under Pandoc (not a
1516        // thematic break), so the lookahead intentionally accepts it; the full
1517        // parser then rejects it if no rows follow.
1518        assert!(table_grid_starts_at(&["-------"][..], 0));
1519
1520        // Negatives — shapes that must not read as a table start.
1521        assert!(!table_grid_starts_at(&["just some prose"][..], 0));
1522        assert!(!table_grid_starts_at(&["# Heading"][..], 0));
1523        assert!(!table_grid_starts_at(&["```", "code", "```"][..], 0)); // code fence
1524        assert!(!table_grid_starts_at(&["only one line"][..], 1)); // out of range
1525    }
1526
1527    /// The cheap caption lookahead must agree with what the full parser does:
1528    /// when it says a table follows the caption, a table node really forms; when
1529    /// it says no table follows, none does. This guards against the lookahead
1530    /// (`table_grid_starts_at`) drifting from the real per-kind parsers.
1531    #[test]
1532    fn caption_lookahead_agrees_with_real_parse() {
1533        let with_table = ": Cap\n\n| A | B |\n|---|---|\n| 1 | 2 |\n";
1534        let lines: Vec<&str> = with_table.lines().collect();
1535        assert!(is_caption_followed_by_table(&lines[..], 0));
1536        assert!(format!("{:#?}", crate::parse(with_table, None)).contains("PIPE_TABLE"));
1537
1538        let no_table = ": Cap\n\nplain paragraph\n";
1539        let lines: Vec<&str> = no_table.lines().collect();
1540        assert!(!is_caption_followed_by_table(&lines[..], 0));
1541        assert!(!format!("{:#?}", crate::parse(no_table, None)).contains("TABLE"));
1542    }
1543
1544    /// Pandoc parses `table` before `orderedList` (but `bulletList` before
1545    /// `table`) in its `block` choice. So an ordered marker whose line is the
1546    /// header of a valid pipe table is NOT a list: the whole construct is a
1547    /// top-level table absorbing the marker as the first header cell. Bullets
1548    /// and a lone ordered marker (no delimiter) stay lists. Verified against
1549    /// pandoc 3.9 (`-f markdown -t native`).
1550    #[test]
1551    fn ordered_marker_on_pipe_table_line_is_top_level_table() {
1552        let input = "1. | a | b |\n   | - | - |\n   | 1 | 2 |\n";
1553        let tree = crate::parse(input, None);
1554        assert!(
1555            tree.descendants()
1556                .any(|n| n.kind() == SyntaxKind::PIPE_TABLE),
1557            "ordered marker + pipe table on the marker line should be a top-level table"
1558        );
1559        assert!(
1560            !tree.descendants().any(|n| n.kind() == SyntaxKind::LIST),
1561            "it must not nest under a list"
1562        );
1563        // Lossless: the marker and the overflow cell survive in the CST.
1564        let dump = format!("{tree:#?}");
1565        assert!(
1566            dump.contains("1."),
1567            "marker text preserved as a header cell"
1568        );
1569        assert!(dump.contains('b'), "overflow cell `b` preserved (lossless)");
1570    }
1571
1572    #[test]
1573    fn lone_ordered_marker_pipe_line_is_a_list() {
1574        // No delimiter row → pandoc's `table` fails, `orderedList` catches it.
1575        let input = "1. | a | b |\n";
1576        let tree = crate::parse(input, None);
1577        assert!(
1578            tree.descendants().any(|n| n.kind() == SyntaxKind::LIST),
1579            "a lone ordered marker line stays a list"
1580        );
1581        assert!(
1582            !tree
1583                .descendants()
1584                .any(|n| n.kind() == SyntaxKind::PIPE_TABLE),
1585            "no table without a delimiter row"
1586        );
1587    }
1588
1589    #[test]
1590    fn bullet_marker_on_pipe_table_line_stays_a_nested_table() {
1591        // Bullets already match pandoc (`BulletList -> Table`): regression guard.
1592        let input = "- | a | b |\n  | - | - |\n  | 1 | 2 |\n";
1593        let tree = crate::parse(input, None);
1594        assert!(
1595            tree.descendants().any(|n| n.kind() == SyntaxKind::LIST),
1596            "bullet marker keeps the list"
1597        );
1598        assert!(
1599            tree.descendants()
1600                .any(|n| n.kind() == SyntaxKind::PIPE_TABLE),
1601            "with the table nested inside the list item"
1602        );
1603    }
1604
1605    #[test]
1606    fn bare_colon_fenced_code_is_not_table_caption() {
1607        let input = "Term\n: ```\n  code\n  ```\n";
1608        let tree = crate::parse(input, None);
1609
1610        assert!(
1611            tree.descendants()
1612                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1613            "should parse as definition list"
1614        );
1615        assert!(
1616            tree.descendants()
1617                .any(|node| node.kind() == SyntaxKind::CODE_BLOCK),
1618            "definition should preserve fenced code block"
1619        );
1620        assert!(
1621            !tree
1622                .descendants()
1623                .any(|node| node.kind() == SyntaxKind::TABLE_CAPTION),
1624            "fenced code definition should not be parsed as table caption"
1625        );
1626    }
1627
1628    #[test]
1629    fn bare_colon_caption_after_div_opening_is_table_caption() {
1630        let input = "::: {#tbl:panel layout.nrow=\"1\"}\n  : My Caption {#tbl:foo-1}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\n  : My Caption2 {#tbl:foo-2}\n\n  | Col1 | Col2 | Col3 |\n  | ---- | ---- | ---- |\n  | A    | B    | C    |\n  | E    | F    | G    |\n  | A    | G    | G    |\n\nCaption\n:::\n";
1631        let tree = crate::parse(input, None);
1632
1633        let caption_count = tree
1634            .descendants()
1635            .filter(|node| node.kind() == SyntaxKind::TABLE_CAPTION)
1636            .count();
1637        assert_eq!(
1638            caption_count, 2,
1639            "expected both captions to attach to tables"
1640        );
1641        assert!(
1642            !tree
1643                .descendants()
1644                .any(|node| node.kind() == SyntaxKind::DEFINITION_LIST),
1645            "caption lines in this fenced div table layout should not parse as definition list"
1646        );
1647    }
1648
1649    #[test]
1650    fn test_table_with_caption_after() {
1651        let input = vec![
1652            "  Right     Left     Center     Default",
1653            "-------     ------ ----------   -------",
1654            "     12     12        12            12",
1655            "    123     123       123          123",
1656            "",
1657            "Table: Demonstration of simple table syntax.",
1658            "",
1659        ];
1660
1661        let mut builder = GreenNodeBuilder::new();
1662        let prefix = ContainerPrefix::default();
1663        let window = StrippedLines::new(&input, 0, &prefix);
1664        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1665
1666        assert!(result.is_some());
1667        // Should consume: header + sep + 2 rows + blank + caption
1668        assert_eq!(result.unwrap(), 6);
1669    }
1670
1671    #[test]
1672    fn test_table_with_caption_before() {
1673        let input = vec![
1674            "Table: Demonstration of simple table syntax.",
1675            "",
1676            "  Right     Left     Center     Default",
1677            "-------     ------ ----------   -------",
1678            "     12     12        12            12",
1679            "    123     123       123          123",
1680            "",
1681        ];
1682
1683        let mut builder = GreenNodeBuilder::new();
1684        let prefix = ContainerPrefix::default();
1685        let window = StrippedLines::new(&input, 2, &prefix);
1686        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1687
1688        assert!(result.is_some());
1689        // Should consume: caption + blank + header + sep + 2 rows
1690        assert_eq!(result.unwrap(), 6);
1691    }
1692
1693    #[test]
1694    fn test_caption_with_colon_prefix() {
1695        let input = vec![
1696            "  Right     Left",
1697            "-------     ------",
1698            "     12     12",
1699            "",
1700            ": Short caption",
1701            "",
1702        ];
1703
1704        let mut builder = GreenNodeBuilder::new();
1705        let prefix = ContainerPrefix::default();
1706        let window = StrippedLines::new(&input, 0, &prefix);
1707        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1708
1709        assert!(result.is_some());
1710        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1711    }
1712
1713    #[test]
1714    fn test_multiline_caption() {
1715        let input = vec![
1716            "  Right     Left",
1717            "-------     ------",
1718            "     12     12",
1719            "",
1720            "Table: This is a longer caption",
1721            "that spans multiple lines.",
1722            "",
1723        ];
1724
1725        let mut builder = GreenNodeBuilder::new();
1726        let prefix = ContainerPrefix::default();
1727        let window = StrippedLines::new(&input, 0, &prefix);
1728        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1729
1730        assert!(result.is_some());
1731        // Should consume through end of multi-line caption
1732        assert_eq!(result.unwrap(), 6);
1733    }
1734
1735    #[test]
1736    fn test_simple_table_with_multibyte_cell_content() {
1737        let input = vec![
1738            "Name            Hex code     Hue     C, M, Y, K (%)   R, G, B (0-255)   R, G, B (%)",
1739            "--------------  ------------ ------- ---------------- ----------------- ------------",
1740            "        orange       #E69F00     41° 0, 50, 100, 0    230, 159, 0       90, 60, 0",
1741            "      sky blue       #56B4E9    202° 80, 0, 0, 0      86, 180, 233      35, 70, 90",
1742            "",
1743        ];
1744
1745        let mut builder = GreenNodeBuilder::new();
1746        let prefix = ContainerPrefix::default();
1747        let window = StrippedLines::new(&input, 0, &prefix);
1748        let result = try_parse_simple_table(&window, &mut builder, &ParserOptions::default());
1749
1750        assert!(result.is_some());
1751        assert_eq!(result.unwrap(), 4);
1752    }
1753
1754    // Pipe table tests
1755    #[test]
1756    fn test_pipe_separator_detection() {
1757        assert!(try_parse_pipe_separator("|------:|:-----|---------|:------:|").is_some());
1758        assert!(try_parse_pipe_separator("|---|---|").is_some());
1759        assert!(try_parse_pipe_separator("-----|-----:").is_some()); // No leading pipe
1760        assert!(try_parse_pipe_separator("|-----+-------|").is_some()); // Orgtbl variant
1761        assert!(try_parse_pipe_separator("not a separator").is_none());
1762    }
1763
1764    #[test]
1765    fn test_pipe_alignments() {
1766        let aligns = try_parse_pipe_separator("|------:|:-----|---------|:------:|").unwrap();
1767        assert_eq!(aligns.len(), 4);
1768        assert_eq!(aligns[0], Alignment::Right);
1769        assert_eq!(aligns[1], Alignment::Left);
1770        assert_eq!(aligns[2], Alignment::Default);
1771        assert_eq!(aligns[3], Alignment::Center);
1772    }
1773
1774    #[test]
1775    fn test_parse_pipe_table_row() {
1776        let cells = parse_pipe_table_row("| Right | Left | Center |");
1777        assert_eq!(cells.len(), 3);
1778        assert_eq!(cells[0], "Right");
1779        assert_eq!(cells[1], "Left");
1780        assert_eq!(cells[2], "Center");
1781
1782        // Without leading/trailing pipes
1783        let cells2 = parse_pipe_table_row("Right | Left | Center");
1784        assert_eq!(cells2.len(), 3);
1785    }
1786
1787    #[test]
1788    fn test_basic_pipe_table() {
1789        let input = vec![
1790            "",
1791            "| Right | Left | Center |",
1792            "|------:|:-----|:------:|",
1793            "|   12  |  12  |   12   |",
1794            "|  123  |  123 |  123   |",
1795            "",
1796        ];
1797
1798        let mut builder = GreenNodeBuilder::new();
1799        let prefix = ContainerPrefix::default();
1800        let window = StrippedLines::new(&input, 1, &prefix);
1801        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1802
1803        assert!(result.is_some());
1804        assert_eq!(result.unwrap(), 4); // header + sep + 2 rows
1805    }
1806
1807    #[test]
1808    fn test_pipe_table_no_edge_pipes() {
1809        let input = vec![
1810            "",
1811            "fruit| price",
1812            "-----|-----:",
1813            "apple|2.05",
1814            "pear|1.37",
1815            "",
1816        ];
1817
1818        let mut builder = GreenNodeBuilder::new();
1819        let prefix = ContainerPrefix::default();
1820        let window = StrippedLines::new(&input, 1, &prefix);
1821        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1822
1823        assert!(result.is_some());
1824        assert_eq!(result.unwrap(), 4);
1825    }
1826
1827    #[test]
1828    fn test_pipe_table_with_caption() {
1829        let input = vec![
1830            "",
1831            "| Col1 | Col2 |",
1832            "|------|------|",
1833            "| A    | B    |",
1834            "",
1835            "Table: My pipe table",
1836            "",
1837        ];
1838
1839        let mut builder = GreenNodeBuilder::new();
1840        let prefix = ContainerPrefix::default();
1841        let window = StrippedLines::new(&input, 1, &prefix);
1842        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1843
1844        assert!(result.is_some());
1845        assert_eq!(result.unwrap(), 5); // header + sep + row + blank + caption
1846    }
1847
1848    #[test]
1849    fn test_pipe_table_with_multiline_caption_before() {
1850        let input = vec![
1851            ": (#tab:base) base R quoting",
1852            "functions",
1853            "",
1854            "| C | D |",
1855            "|---|---|",
1856            "| 3 | 4 |",
1857            "",
1858        ];
1859
1860        let mut builder = GreenNodeBuilder::new();
1861        let prefix = ContainerPrefix::default();
1862        let window = StrippedLines::new(&input, 0, &prefix);
1863        let result = try_parse_pipe_table(&window, &mut builder, &ParserOptions::default());
1864
1865        assert!(result.is_some());
1866        // caption(2) + blank(1) + header + sep + row
1867        assert_eq!(result.unwrap(), 6);
1868    }
1869}
1870
1871// ============================================================================
1872// Grid Table Parsing
1873// ============================================================================
1874
1875/// Check if a line is a grid table row separator (starts with +, contains -, ends with +).
1876/// Returns Some(vec of column info) if valid, None otherwise.
1877fn try_parse_grid_separator(line: &str) -> Option<Vec<GridColumn>> {
1878    let trimmed = line.trim_start();
1879    let leading_spaces = line.len() - trimmed.len();
1880
1881    // A grid border must begin at column 0 of its container content. Detection
1882    // runs on the container-prefix-stripped line (see `try_parse_grid_table`),
1883    // so any remaining leading whitespace means the border is indented relative
1884    // to its container -- pandoc parses that as a paragraph, not a grid table.
1885    if leading_spaces > 0 {
1886        return None;
1887    }
1888
1889    // Must start with + and end with +
1890    if !trimmed.starts_with('+') || !trimmed.trim_end().ends_with('+') {
1891        return None;
1892    }
1893
1894    // Split by + to get column segments
1895    let trimmed = trimmed.trim_end();
1896    let segments: Vec<&str> = trimmed.split('+').collect();
1897
1898    // Need at least 3 parts: empty before first +, column(s), empty after last +
1899    if segments.len() < 3 {
1900        return None;
1901    }
1902
1903    let mut columns = Vec::new();
1904
1905    // Parse each segment between + signs
1906    for segment in segments.iter().skip(1).take(segments.len() - 2) {
1907        if segment.is_empty() {
1908            continue;
1909        }
1910
1911        // Segment must be dashes/equals with optional colons for alignment
1912        let seg_trimmed = *segment;
1913
1914        // Get the fill character (after removing colons)
1915        let inner = seg_trimmed.trim_start_matches(':').trim_end_matches(':');
1916
1917        // Must be all dashes or all equals
1918        if inner.is_empty() {
1919            return None;
1920        }
1921
1922        let first_char = inner.chars().next().unwrap();
1923        if first_char != '-' && first_char != '=' {
1924            return None;
1925        }
1926
1927        if !inner.chars().all(|c| c == first_char) {
1928            return None;
1929        }
1930
1931        let is_header_sep = first_char == '=';
1932
1933        columns.push(GridColumn {
1934            is_header_separator: is_header_sep,
1935            width: seg_trimmed.chars().count(),
1936        });
1937    }
1938
1939    if columns.is_empty() {
1940        None
1941    } else {
1942        Some(columns)
1943    }
1944}
1945
1946/// Column information for grid tables.
1947#[derive(Debug, Clone)]
1948struct GridColumn {
1949    is_header_separator: bool,
1950    width: usize,
1951}
1952
1953fn slice_cell_by_display_width(line: &str, start_byte: usize, width: usize) -> (usize, usize) {
1954    let mut end_byte = start_byte;
1955    let mut display_cols = 0usize;
1956
1957    for (offset, ch) in line[start_byte..].char_indices() {
1958        if ch == '|' {
1959            let sep_byte = start_byte + offset;
1960            return (sep_byte, sep_byte + 1);
1961        }
1962        let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
1963        if display_cols + ch_width > width {
1964            break;
1965        }
1966        display_cols += ch_width;
1967        end_byte = start_byte + offset + ch.len_utf8();
1968        if display_cols >= width {
1969            break;
1970        }
1971    }
1972
1973    // If the width budget is exhausted before seeing a separator (for example
1974    // because of padding/layout drift), advance to the next literal separator
1975    // to keep row slicing aligned and preserve losslessness.
1976    let mut sep_byte = end_byte;
1977    while sep_byte < line.len() {
1978        let mut chars = line[sep_byte..].chars();
1979        let Some(ch) = chars.next() else {
1980            break;
1981        };
1982        if ch == '|' {
1983            return (sep_byte, sep_byte + 1);
1984        }
1985        sep_byte += ch.len_utf8();
1986    }
1987
1988    (end_byte, end_byte)
1989}
1990
1991/// Check if a line is a grid table content row.
1992/// Accepts normal rows ending with `|` and spanning-style continuation lines ending with `+`.
1993fn is_grid_content_row(line: &str) -> bool {
1994    let trimmed = line.trim_start();
1995    let leading_spaces = line.len() - trimmed.len();
1996
1997    if leading_spaces > 3 {
1998        return false;
1999    }
2000
2001    let trimmed = trimmed.trim_end();
2002    trimmed.starts_with('|') && (trimmed.ends_with('|') || trimmed.ends_with('+'))
2003}
2004
2005/// Extract cell contents from a single grid table row line.
2006/// Returns a vector of cell contents (trimmed) based on column boundaries.
2007/// Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
2008fn extract_grid_cells_from_line(line: &str, _columns: &[GridColumn]) -> Vec<String> {
2009    let (line_content, _) = strip_newline(line);
2010    let line_trimmed = line_content.trim();
2011
2012    if !line_trimmed.starts_with('|') || !line_trimmed.ends_with('|') {
2013        return vec![String::new(); _columns.len()];
2014    }
2015
2016    let mut cells = Vec::with_capacity(_columns.len());
2017    let mut pos_byte = 1; // Skip leading pipe
2018
2019    for col in _columns {
2020        let col_idx = cells.len();
2021        if pos_byte >= line_trimmed.len() {
2022            cells.push(String::new());
2023            continue;
2024        }
2025
2026        let start_byte = pos_byte;
2027        let end_byte = if col_idx + 1 == _columns.len() {
2028            line_trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
2029        } else {
2030            let (end, next_start) = slice_cell_by_display_width(line_trimmed, pos_byte, col.width);
2031            pos_byte = next_start;
2032            end
2033        };
2034        cells.push(line_trimmed[start_byte..end_byte].trim().to_string());
2035        if col_idx + 1 == _columns.len() {
2036            pos_byte = line_trimmed.len();
2037        }
2038    }
2039
2040    cells
2041}
2042
2043/// Emit a grid table row with inline-parsed cells.
2044/// Handles multi-line rows by emitting first line with TABLE_CELL nodes,
2045/// then continuation lines as raw TEXT for losslessness.
2046fn emit_grid_table_row(
2047    builder: &mut GreenNodeBuilder<'static>,
2048    window: &StrippedLines<'_, '_>,
2049    indices: &[usize],
2050    columns: &[GridColumn],
2051    row_kind: SyntaxKind,
2052    config: &ParserOptions,
2053) {
2054    if indices.is_empty() {
2055        return;
2056    }
2057
2058    builder.start_node(row_kind.into());
2059
2060    // Emit first line with TABLE_CELL nodes. The continuation-line container
2061    // prefix (`  > `) is re-emitted as WHITESPACE/BLOCK_QUOTE_MARKER tokens
2062    // inside the row node before the cell text; the returned tail is the
2063    // prefix-stripped line we slice cells from (empty prefix ⇒ raw line).
2064    // Grid table rows look like: "| Cell 1 | Cell 2 | Cell 3 |"
2065    let first_line = window.emit_or_dispatch_tail(builder, indices[0]);
2066    let cell_contents = extract_grid_cells_from_line(first_line, columns);
2067    let (line_without_newline, newline_str) = strip_newline(first_line);
2068    let trimmed = line_without_newline.trim();
2069    let expected_pipe_count = columns.len().saturating_add(1);
2070    let actual_pipe_count = trimmed.chars().filter(|&c| c == '|').count();
2071
2072    // Rows that don't contain all expected column separators (spanning-style rows)
2073    // must be emitted verbatim for losslessness. The first line's prefix was
2074    // already consumed above; emit its tail and each continuation tail.
2075    if actual_pipe_count != expected_pipe_count {
2076        emit_line_tokens(builder, first_line);
2077        for &idx in &indices[1..] {
2078            let tail = window.emit_or_dispatch_tail(builder, idx);
2079            emit_line_tokens(builder, tail);
2080        }
2081        builder.finish_node();
2082        return;
2083    }
2084
2085    // Emit leading whitespace
2086    let leading_ws_len = line_without_newline.len() - line_without_newline.trim_start().len();
2087    if leading_ws_len > 0 {
2088        builder.token(
2089            SyntaxKind::WHITESPACE.into(),
2090            &line_without_newline[..leading_ws_len],
2091        );
2092    }
2093
2094    // Emit leading pipe
2095    if trimmed.starts_with('|') {
2096        builder.token(SyntaxKind::TEXT.into(), "|");
2097    }
2098
2099    // Emit each cell based on fixed column widths from separators
2100    let mut pos_byte = 1usize; // after leading pipe
2101    for (idx, cell_content) in cell_contents.iter().enumerate() {
2102        let part = if idx < columns.len() && pos_byte <= trimmed.len() {
2103            let start_byte = pos_byte;
2104            let end_byte = if idx + 1 == columns.len() && !trimmed.is_empty() {
2105                trimmed.len().saturating_sub(1) // consume to trailing pipe for last column
2106            } else {
2107                let (end, next_start) =
2108                    slice_cell_by_display_width(trimmed, pos_byte, columns[idx].width);
2109                pos_byte = next_start;
2110                end
2111            };
2112            let slice = &trimmed[start_byte..end_byte];
2113            if idx + 1 == columns.len() {
2114                pos_byte = trimmed.len();
2115            }
2116            slice
2117        } else {
2118            ""
2119        };
2120
2121        // Emit leading whitespace in cell
2122        let cell_trimmed = part.trim();
2123        let ws_start_len = part.len() - part.trim_start().len();
2124        if ws_start_len > 0 {
2125            builder.token(SyntaxKind::WHITESPACE.into(), &part[..ws_start_len]);
2126        }
2127
2128        // Emit TABLE_CELL with inline parsing
2129        emit_table_cell(builder, cell_content, config);
2130
2131        // Emit trailing whitespace in cell
2132        let ws_end_start = ws_start_len + cell_trimmed.len();
2133        if ws_end_start < part.len() {
2134            builder.token(SyntaxKind::WHITESPACE.into(), &part[ws_end_start..]);
2135        }
2136
2137        // Emit pipe separator (unless this is the last cell and line doesn't end with |)
2138        if idx < cell_contents.len() - 1 || trimmed.ends_with('|') {
2139            builder.token(SyntaxKind::TEXT.into(), "|");
2140        }
2141    }
2142
2143    // Emit trailing whitespace before newline
2144    let trailing_ws_start = leading_ws_len + trimmed.len();
2145    if trailing_ws_start < line_without_newline.len() {
2146        builder.token(
2147            SyntaxKind::WHITESPACE.into(),
2148            &line_without_newline[trailing_ws_start..],
2149        );
2150    }
2151
2152    // Emit newline
2153    if !newline_str.is_empty() {
2154        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
2155    }
2156
2157    // Emit continuation lines as TEXT for losslessness, re-emitting each
2158    // line's container prefix first.
2159    for &idx in &indices[1..] {
2160        let tail = window.emit_or_dispatch_tail(builder, idx);
2161        emit_line_tokens(builder, tail);
2162    }
2163
2164    builder.finish_node();
2165}
2166
2167/// Try to parse a grid table starting at the given position.
2168/// Returns the number of lines consumed if successful.
2169pub(crate) fn try_parse_grid_table(
2170    window: &StrippedLines<'_, '_>,
2171    builder: &mut GreenNodeBuilder<'static>,
2172    config: &ParserOptions,
2173) -> Option<usize> {
2174    let lines = window.raw();
2175    let start_pos = window.pos();
2176    if start_pos >= lines.len() {
2177        return None;
2178    }
2179
2180    // Grid-border detection reads the stripped view through `UniformStripView`,
2181    // which strips *every* line — including the dispatch line — with the full
2182    // container strip. The strict column-0 check in `try_parse_grid_separator`
2183    // would otherwise reject a `+---+` border sitting at column 0 of a list
2184    // item's inner content if the dispatch line kept its list-indent. With an
2185    // empty prefix the stripped view equals the raw lines. Emission still goes
2186    // through `window.emit_or_dispatch_tail`, which preserves the indent bytes.
2187    // Scans stop at the first blank line, so only a bounded range is stripped.
2188    let view = UniformStripView(window);
2189
2190    // Cheap gate: a grid table's first line is a grid separator (`+---+`/`+===+`),
2191    // unless this is a caption-led table. Table detection runs at every block
2192    // start, so any per-line work for every prose/math paragraph was quadratic
2193    // on large documents. Run the gate on the same `view` the detection uses, so
2194    // it applies inside containers (blockquote/list) too — not just at top level.
2195    if try_parse_grid_separator(view.line(start_pos)).is_none()
2196        && !is_caption_followed_by_table(&view, start_pos)
2197    {
2198        return None;
2199    }
2200
2201    // Check if this line is a caption followed by a table
2202    // If so, the actual table starts after the caption and blank line
2203    let (actual_start, caption_before) = if is_caption_followed_by_table(&view, start_pos) {
2204        let (cap_start, cap_end) = caption_range_starting_at(&view, start_pos)?;
2205        let mut pos = cap_end;
2206        while pos < view.line_count() && view.line(pos).trim().is_empty() {
2207            pos += 1;
2208        }
2209        (pos, Some((cap_start, cap_end)))
2210    } else {
2211        (start_pos, None)
2212    };
2213
2214    if actual_start >= lines.len() {
2215        return None;
2216    }
2217
2218    // First line must be a grid separator
2219    let first_line = view.line(actual_start);
2220    let _columns = try_parse_grid_separator(first_line)?;
2221
2222    // Track table structure
2223    let mut end_pos = actual_start + 1;
2224    let mut found_header_sep = false;
2225    let mut in_footer = false;
2226
2227    // Scan table lines
2228    while end_pos < lines.len() {
2229        let line = view.line(end_pos);
2230
2231        // Check for blank line (table ends)
2232        if line.trim().is_empty() {
2233            break;
2234        }
2235
2236        // Check for separator line
2237        if let Some(sep_cols) = try_parse_grid_separator(line) {
2238            // Check if this is a header separator (=)
2239            if sep_cols.iter().any(|c| c.is_header_separator) {
2240                if !found_header_sep {
2241                    found_header_sep = true;
2242                } else if !in_footer {
2243                    // Second = separator starts footer
2244                    in_footer = true;
2245                }
2246            }
2247            end_pos += 1;
2248            continue;
2249        }
2250
2251        // Check for content row
2252        if is_grid_content_row(line) {
2253            end_pos += 1;
2254            continue;
2255        }
2256
2257        // Not a valid grid table line - table ends
2258        break;
2259    }
2260
2261    // Must have consumed at least 3 lines (top separator, content, bottom separator)
2262    // Or just top + content rows that end with a separator
2263    if end_pos <= actual_start + 1 {
2264        return None;
2265    }
2266
2267    // Last consumed line should be a separator for a well-formed table
2268    // But we'll be lenient and accept tables ending with content rows
2269
2270    // Check for caption before table (only if we didn't already detected it)
2271    let caption_before = caption_before.or_else(|| find_caption_before_table(&view, actual_start));
2272
2273    // Check for caption after table
2274    let caption_after = if caption_before.is_some() {
2275        None
2276    } else {
2277        find_caption_after_table(&view, end_pos)
2278    };
2279
2280    // Build the grid table
2281    builder.start_node(SyntaxKind::GRID_TABLE.into());
2282
2283    // Emit caption before if present
2284    if let Some((cap_start, cap_end)) = caption_before {
2285        emit_table_caption(builder, window, cap_start, cap_end, config);
2286        // Emit blank line between caption and table if present
2287        emit_caption_blank_lines(builder, window, cap_end, actual_start);
2288    }
2289
2290    // Track whether we've passed the header separator
2291    let mut past_header_sep = false;
2292    let mut in_footer_section = false;
2293    // Accumulate ABSOLUTE indices of the lines making up a multi-line row, so
2294    // each line's container prefix can be re-emitted via the window.
2295    let mut current_row_indices: Vec<usize> = Vec::new();
2296    let mut current_row_kind = SyntaxKind::TABLE_HEADER;
2297
2298    // Emit table rows - accumulate multi-line cells
2299    for idx in actual_start..end_pos {
2300        let line = view.line(idx);
2301        if let Some(sep_cols) = try_parse_grid_separator(line) {
2302            // Separator line - emit any accumulated row first
2303            if !current_row_indices.is_empty() {
2304                emit_grid_table_row(
2305                    builder,
2306                    window,
2307                    &current_row_indices,
2308                    &sep_cols,
2309                    current_row_kind,
2310                    config,
2311                );
2312                current_row_indices.clear();
2313            }
2314
2315            let is_header_sep = sep_cols.iter().any(|c| c.is_header_separator);
2316
2317            // Re-emit any continuation-line container prefix (`  > `) as
2318            // WHITESPACE/BLOCK_QUOTE_MARKER tokens before the separator text.
2319            if is_header_sep {
2320                if !past_header_sep {
2321                    // This is the header/body separator
2322                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2323                    let tail = window.emit_or_dispatch_tail(builder, idx);
2324                    emit_separator_tokens(builder, tail);
2325                    builder.finish_node();
2326                    past_header_sep = true;
2327                } else {
2328                    // Footer separator
2329                    if !in_footer_section {
2330                        in_footer_section = true;
2331                    }
2332                    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2333                    let tail = window.emit_or_dispatch_tail(builder, idx);
2334                    emit_separator_tokens(builder, tail);
2335                    builder.finish_node();
2336                }
2337            } else {
2338                // Regular separator (row boundary)
2339                builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2340                let tail = window.emit_or_dispatch_tail(builder, idx);
2341                emit_separator_tokens(builder, tail);
2342                builder.finish_node();
2343            }
2344        } else if is_grid_content_row(line) {
2345            // Content row - accumulate for multi-line cells
2346            current_row_kind = if !past_header_sep && found_header_sep {
2347                SyntaxKind::TABLE_HEADER
2348            } else if in_footer_section {
2349                SyntaxKind::TABLE_FOOTER
2350            } else {
2351                SyntaxKind::TABLE_ROW
2352            };
2353
2354            current_row_indices.push(idx);
2355        }
2356    }
2357
2358    // Emit any remaining accumulated row
2359    if !current_row_indices.is_empty() {
2360        // Use first separator's columns for cell boundaries
2361        if let Some(sep_cols) = try_parse_grid_separator(view.line(actual_start)) {
2362            emit_grid_table_row(
2363                builder,
2364                window,
2365                &current_row_indices,
2366                &sep_cols,
2367                current_row_kind,
2368                config,
2369            );
2370        }
2371    }
2372
2373    // Emit caption after if present
2374    if let Some((cap_start, cap_end)) = caption_after {
2375        emit_caption_blank_lines(builder, window, end_pos, cap_start);
2376        emit_table_caption(builder, window, cap_start, cap_end, config);
2377    }
2378
2379    builder.finish_node(); // GRID_TABLE
2380
2381    // Calculate lines consumed
2382    let table_start = caption_before
2383        .map(|(start, _)| start)
2384        .unwrap_or(actual_start);
2385    let table_end = if let Some((_, cap_end)) = caption_after {
2386        cap_end
2387    } else {
2388        end_pos
2389    };
2390
2391    Some(table_end - table_start)
2392}
2393
2394#[cfg(test)]
2395mod grid_table_tests {
2396    use super::super::container_prefix::ContainerPrefix;
2397    use super::*;
2398
2399    #[test]
2400    fn test_grid_separator_detection() {
2401        assert!(try_parse_grid_separator("+---+---+").is_some());
2402        assert!(try_parse_grid_separator("+===+===+").is_some());
2403        assert!(try_parse_grid_separator("+---------------+---------------+").is_some());
2404        assert!(try_parse_grid_separator("+:---:+").is_some()); // center aligned
2405        assert!(try_parse_grid_separator("not a separator").is_none());
2406        assert!(try_parse_grid_separator("|---|---|").is_none()); // pipe table sep
2407
2408        // A grid border must sit at column 0 of its container content; an
2409        // indented border is not a grid table (matches pandoc, which parses
2410        // an indented `+---+` as a paragraph). Detection runs on the
2411        // container-stripped line, so any remaining leading space disqualifies.
2412        assert!(try_parse_grid_separator(" +---+---+").is_none());
2413        assert!(try_parse_grid_separator("  +---+---+").is_none());
2414        assert!(try_parse_grid_separator("   +===+===+").is_none());
2415    }
2416
2417    #[test]
2418    fn test_grid_header_separator() {
2419        let cols = try_parse_grid_separator("+===+===+").unwrap();
2420        assert!(cols.iter().all(|c| c.is_header_separator));
2421
2422        let cols2 = try_parse_grid_separator("+---+---+").unwrap();
2423        assert!(cols2.iter().all(|c| !c.is_header_separator));
2424    }
2425
2426    #[test]
2427    fn test_grid_content_row_detection() {
2428        assert!(is_grid_content_row("| content | content |"));
2429        assert!(is_grid_content_row("|  |  |"));
2430        assert!(is_grid_content_row("| content +------+"));
2431        assert!(!is_grid_content_row("+---+---+")); // separator, not content
2432        assert!(!is_grid_content_row("no pipes here"));
2433    }
2434
2435    #[test]
2436    fn test_basic_grid_table() {
2437        let input = vec![
2438            "+-------+-------+",
2439            "| Col1  | Col2  |",
2440            "+=======+=======+",
2441            "| A     | B     |",
2442            "+-------+-------+",
2443            "",
2444        ];
2445
2446        let mut builder = GreenNodeBuilder::new();
2447        let prefix = ContainerPrefix::default();
2448        let window = StrippedLines::new(&input, 0, &prefix);
2449        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2450
2451        assert!(result.is_some());
2452        assert_eq!(result.unwrap(), 5);
2453    }
2454
2455    #[test]
2456    fn test_grid_table_multirow() {
2457        let input = vec![
2458            "+---------------+---------------+",
2459            "| Fruit         | Advantages    |",
2460            "+===============+===============+",
2461            "| Bananas       | - wrapper     |",
2462            "|               | - color       |",
2463            "+---------------+---------------+",
2464            "| Oranges       | - scurvy      |",
2465            "|               | - tasty       |",
2466            "+---------------+---------------+",
2467            "",
2468        ];
2469
2470        let mut builder = GreenNodeBuilder::new();
2471        let prefix = ContainerPrefix::default();
2472        let window = StrippedLines::new(&input, 0, &prefix);
2473        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2474
2475        assert!(result.is_some());
2476        assert_eq!(result.unwrap(), 9);
2477    }
2478
2479    #[test]
2480    fn test_grid_table_with_footer() {
2481        let input = vec![
2482            "+-------+-------+",
2483            "| Fruit | Price |",
2484            "+=======+=======+",
2485            "| Apple | $1.00 |",
2486            "+-------+-------+",
2487            "| Pear  | $1.50 |",
2488            "+=======+=======+",
2489            "| Total | $2.50 |",
2490            "+=======+=======+",
2491            "",
2492        ];
2493
2494        let mut builder = GreenNodeBuilder::new();
2495        let prefix = ContainerPrefix::default();
2496        let window = StrippedLines::new(&input, 0, &prefix);
2497        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2498
2499        assert!(result.is_some());
2500        assert_eq!(result.unwrap(), 9);
2501    }
2502
2503    #[test]
2504    fn test_grid_table_headerless() {
2505        let input = vec![
2506            "+-------+-------+",
2507            "| A     | B     |",
2508            "+-------+-------+",
2509            "| C     | D     |",
2510            "+-------+-------+",
2511            "",
2512        ];
2513
2514        let mut builder = GreenNodeBuilder::new();
2515        let prefix = ContainerPrefix::default();
2516        let window = StrippedLines::new(&input, 0, &prefix);
2517        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2518
2519        assert!(result.is_some());
2520        assert_eq!(result.unwrap(), 5);
2521    }
2522
2523    #[test]
2524    fn test_grid_table_with_caption_before() {
2525        let input = vec![
2526            ": Sample table",
2527            "",
2528            "+-------+-------+",
2529            "| A     | B     |",
2530            "+=======+=======+",
2531            "| C     | D     |",
2532            "+-------+-------+",
2533            "",
2534        ];
2535
2536        let mut builder = GreenNodeBuilder::new();
2537        let prefix = ContainerPrefix::default();
2538        let window = StrippedLines::new(&input, 2, &prefix);
2539        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2540
2541        assert!(result.is_some());
2542        // Should include caption + blank + table
2543        assert_eq!(result.unwrap(), 7);
2544    }
2545
2546    #[test]
2547    fn test_grid_table_with_caption_after() {
2548        let input = vec![
2549            "+-------+-------+",
2550            "| A     | B     |",
2551            "+=======+=======+",
2552            "| C     | D     |",
2553            "+-------+-------+",
2554            "",
2555            "Table: My grid table",
2556            "",
2557        ];
2558
2559        let mut builder = GreenNodeBuilder::new();
2560        let prefix = ContainerPrefix::default();
2561        let window = StrippedLines::new(&input, 0, &prefix);
2562        let result = try_parse_grid_table(&window, &mut builder, &ParserOptions::default());
2563
2564        assert!(result.is_some());
2565        // table + blank + caption
2566        assert_eq!(result.unwrap(), 7);
2567    }
2568}
2569
2570// ============================================================================
2571// Multiline Table Parsing
2572// ============================================================================
2573
2574/// Check if a line is a multiline table separator (continuous dashes).
2575/// Multiline table separators span the full width and are all dashes.
2576/// Returns Some(columns) if valid, None otherwise.
2577fn try_parse_multiline_separator(line: &str) -> Option<Vec<Column>> {
2578    let trimmed = line.trim_start();
2579    let leading_spaces = line.len() - trimmed.len();
2580
2581    // Must have leading spaces <= 3 to not be a code block
2582    if leading_spaces > 3 {
2583        return None;
2584    }
2585
2586    let trimmed = trimmed.trim_end();
2587
2588    // Must be all dashes (continuous line of dashes)
2589    if trimmed.is_empty() || !trimmed.chars().all(|c| c == '-') {
2590        return None;
2591    }
2592
2593    // Must have at least 3 dashes
2594    if trimmed.len() < 3 {
2595        return None;
2596    }
2597
2598    // This is a full-width separator - columns will be determined by column separator lines
2599    Some(vec![Column {
2600        start: leading_spaces,
2601        end: leading_spaces + trimmed.len(),
2602        alignment: Alignment::Default,
2603    }])
2604}
2605
2606/// Check if a line is a column separator line for multiline tables.
2607/// Column separators have dashes with spaces between them to define columns.
2608fn is_column_separator(line: &str) -> bool {
2609    try_parse_table_separator(line).is_some() && !line.contains('*') && !line.contains('_')
2610}
2611
2612fn is_headerless_single_row_without_blank(
2613    lines: &(impl LineView + ?Sized),
2614    row_start: usize,
2615    row_end: usize,
2616    columns: &[Column],
2617) -> bool {
2618    if row_start >= row_end {
2619        return false;
2620    }
2621
2622    if row_end - row_start == 1 {
2623        return false;
2624    }
2625
2626    let Some(last_col) = columns.last() else {
2627        return false;
2628    };
2629
2630    for i in (row_start + 1)..row_end {
2631        let (content, _) = strip_newline(lines.line(i));
2632        let prefix_end = last_col.start.min(content.len());
2633        if !content[..prefix_end].trim().is_empty() {
2634            return false;
2635        }
2636    }
2637
2638    true
2639}
2640
2641/// Try to parse a multiline table starting at the given position.
2642/// Returns the number of lines consumed if successful.
2643pub(crate) fn try_parse_multiline_table(
2644    window: &StrippedLines<'_, '_>,
2645    builder: &mut GreenNodeBuilder<'static>,
2646    config: &ParserOptions,
2647) -> Option<usize> {
2648    let lines = window.raw();
2649    let start_pos = window.pos();
2650    if start_pos >= lines.len() {
2651        return None;
2652    }
2653
2654    // Cheap gate: a multiline table's first line is either a full-width dash
2655    // separator or a column separator. Table detection runs at every block
2656    // start, so any per-line work for every paragraph that can't begin a
2657    // multiline table was quadratic on large documents. Peek just the dispatch
2658    // line via `strip_at` and bail before any further scanning.
2659    let first_line = window.strip_at(start_pos);
2660
2661    // First line can be either:
2662    // 1. A full-width dash separator (for tables with headers)
2663    // 2. A column separator (for headerless tables)
2664    let is_full_width_start = try_parse_multiline_separator(first_line).is_some();
2665    let is_column_sep_start = !is_full_width_start && is_column_separator(first_line);
2666    if !is_full_width_start && !is_column_sep_start {
2667        return None;
2668    }
2669
2670    // Detection scans read the container-prefix-stripped view lazily through the
2671    // window (see `LineView`) so a multiline table nested in `list → blockquote`
2672    // (e.g. `- > ----`) has its `  > ` prefix removed before the
2673    // separator/blank-row shape checks. The interior `>`-only row then strips to
2674    // `""` and registers as a blank row separator. With an empty prefix the
2675    // stripped view equals the raw lines. Scans stop at the first blank/closing
2676    // line, so only a bounded range is stripped. Emission re-emits the prefix
2677    // bytes as tokens via the window; captions read raw `lines`.
2678    let headerless_columns = if is_column_sep_start {
2679        try_parse_table_separator(window.line(start_pos))
2680    } else {
2681        None
2682    };
2683
2684    // Look ahead to find the structure
2685    let mut pos = start_pos + 1;
2686    let mut found_column_sep = is_column_sep_start; // Already found if headerless
2687    let mut column_sep_pos = if is_column_sep_start { start_pos } else { 0 };
2688    let mut has_header = false;
2689    let mut found_blank_line = false;
2690    let mut found_closing_sep = false;
2691    let mut content_line_count = 0usize;
2692
2693    // Scan for header section and column separator
2694    while pos < lines.len() {
2695        let line = window.line(pos);
2696
2697        // Check for column separator (defines columns) - only if we started with full-width
2698        if is_full_width_start && is_column_separator(line) && !found_column_sep {
2699            found_column_sep = true;
2700            column_sep_pos = pos;
2701            has_header = pos > start_pos + 1; // Has header if there's content before column sep
2702            pos += 1;
2703            continue;
2704        }
2705
2706        // Check for blank line (row separator in body)
2707        if line.trim().is_empty() {
2708            found_blank_line = true;
2709            pos += 1;
2710            // Check if next line is a valid closing separator for this table shape.
2711            if pos < lines.len() {
2712                let next = window.line(pos);
2713                let is_valid_closer = if is_full_width_start {
2714                    try_parse_multiline_separator(next).is_some()
2715                } else {
2716                    is_column_separator(next)
2717                };
2718                if is_valid_closer {
2719                    found_closing_sep = true;
2720                    pos += 1; // Include the closing separator
2721                    break;
2722                }
2723            }
2724            continue;
2725        }
2726
2727        // Check for closing full-width dashes (only for full-width-start tables).
2728        if is_full_width_start && try_parse_multiline_separator(line).is_some() {
2729            found_closing_sep = true;
2730            pos += 1;
2731            break;
2732        }
2733
2734        // Check for closing column separator (for headerless tables)
2735        if is_column_sep_start && is_column_separator(line) && content_line_count > 0 {
2736            found_closing_sep = true;
2737            pos += 1;
2738            break;
2739        }
2740
2741        // Content row
2742        content_line_count += 1;
2743        pos += 1;
2744    }
2745
2746    // Must have found a column separator to be a valid multiline table
2747    if !found_column_sep {
2748        return None;
2749    }
2750
2751    // A blank line between rows is one way to tell a multiline table from a
2752    // simple one, but not the only one. A full-width top border (the
2753    // `is_full_width_start` case) already distinguishes a multiline table from
2754    // a simple table, so pandoc accepts it even when every row is a single line
2755    // with no interior blanks; the required column separator and closing border
2756    // (checked above and below) keep a bare thematic break from matching. Only
2757    // the headerless, column-separator-started shape still needs the
2758    // single-row guard.
2759    if !found_blank_line && is_column_sep_start {
2760        let columns = headerless_columns.as_deref()?;
2761        if !is_headerless_single_row_without_blank(window, start_pos + 1, pos - 1, columns) {
2762            return None;
2763        }
2764    }
2765
2766    // Must have a closing separator
2767    if !found_closing_sep {
2768        return None;
2769    }
2770
2771    // Must have consumed more than just the opening separator
2772    if pos <= start_pos + 2 {
2773        return None;
2774    }
2775
2776    let end_pos = pos;
2777
2778    // Extract column boundaries from the separator line
2779    let columns = try_parse_table_separator(window.line(column_sep_pos))
2780        .expect("Column separator must be valid");
2781
2782    // Check for caption before table
2783    let caption_before = find_caption_before_table(window, start_pos);
2784
2785    // Check for caption after table
2786    let caption_after = if caption_before.is_some() {
2787        None
2788    } else {
2789        find_caption_after_table(window, end_pos)
2790    };
2791
2792    // Build the multiline table
2793    builder.start_node(SyntaxKind::MULTILINE_TABLE.into());
2794
2795    // Emit caption before if present
2796    if let Some((cap_start, cap_end)) = caption_before {
2797        emit_table_caption(builder, window, cap_start, cap_end, config);
2798        // Emit blank line between caption and table if present
2799        emit_caption_blank_lines(builder, window, cap_end, start_pos);
2800    }
2801
2802    // Emit opening separator. The dispatch line's prefix was already consumed
2803    // by core (`dispatch_tail`); a non-dispatch start (caption-before case)
2804    // re-emits its `  > ` prefix via `emit_prefix_at`.
2805    builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2806    let tail = window.emit_or_dispatch_tail(builder, start_pos);
2807    emit_separator_tokens(builder, tail);
2808    builder.finish_node();
2809
2810    // Track state for emitting. Accumulate ABSOLUTE indices of the lines making
2811    // up a multi-line row so each line's container prefix can be re-emitted via
2812    // the window.
2813    let mut in_header = has_header;
2814    let mut current_row_indices: Vec<usize> = Vec::new();
2815
2816    for i in (start_pos + 1)..end_pos {
2817        let line = window.line(i);
2818        // Column separator (header/body divider)
2819        if i == column_sep_pos {
2820            // Emit any accumulated header lines
2821            if !current_row_indices.is_empty() {
2822                emit_multiline_table_row(
2823                    builder,
2824                    window,
2825                    &current_row_indices,
2826                    &columns,
2827                    SyntaxKind::TABLE_HEADER,
2828                    config,
2829                );
2830                current_row_indices.clear();
2831            }
2832
2833            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2834            let tail = window.emit_or_dispatch_tail(builder, i);
2835            emit_separator_tokens(builder, tail);
2836            builder.finish_node();
2837            in_header = false;
2838            continue;
2839        }
2840
2841        // Closing separator (full-width or column separator at end)
2842        if try_parse_multiline_separator(line).is_some() || is_column_separator(line) {
2843            // Emit any accumulated row lines
2844            if !current_row_indices.is_empty() {
2845                let kind = if in_header {
2846                    SyntaxKind::TABLE_HEADER
2847                } else {
2848                    SyntaxKind::TABLE_ROW
2849                };
2850                emit_multiline_table_row(
2851                    builder,
2852                    window,
2853                    &current_row_indices,
2854                    &columns,
2855                    kind,
2856                    config,
2857                );
2858                current_row_indices.clear();
2859            }
2860
2861            builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
2862            let tail = window.emit_or_dispatch_tail(builder, i);
2863            emit_separator_tokens(builder, tail);
2864            builder.finish_node();
2865            continue;
2866        }
2867
2868        // Blank line (row separator)
2869        if line.trim().is_empty() {
2870            // Emit accumulated row
2871            if !current_row_indices.is_empty() {
2872                let kind = if in_header {
2873                    SyntaxKind::TABLE_HEADER
2874                } else {
2875                    SyntaxKind::TABLE_ROW
2876                };
2877                emit_multiline_table_row(
2878                    builder,
2879                    window,
2880                    &current_row_indices,
2881                    &columns,
2882                    kind,
2883                    config,
2884                );
2885                current_row_indices.clear();
2886            }
2887
2888            // Re-emit the interior `>`-only separator row's container prefix
2889            // (`  > `) inside the BLANK_LINE node so it round-trips losslessly.
2890            builder.start_node(SyntaxKind::BLANK_LINE.into());
2891            let tail = window.emit_or_dispatch_tail(builder, i);
2892            builder.token(SyntaxKind::BLANK_LINE.into(), tail);
2893            builder.finish_node();
2894            continue;
2895        }
2896
2897        // Content line - accumulate for current row
2898        current_row_indices.push(i);
2899    }
2900
2901    // Emit any remaining accumulated lines
2902    if !current_row_indices.is_empty() {
2903        let kind = if in_header {
2904            SyntaxKind::TABLE_HEADER
2905        } else {
2906            SyntaxKind::TABLE_ROW
2907        };
2908        emit_multiline_table_row(
2909            builder,
2910            window,
2911            &current_row_indices,
2912            &columns,
2913            kind,
2914            config,
2915        );
2916    }
2917
2918    // Emit caption after if present
2919    if let Some((cap_start, cap_end)) = caption_after {
2920        emit_caption_blank_lines(builder, window, end_pos, cap_start);
2921        emit_table_caption(builder, window, cap_start, cap_end, config);
2922    }
2923
2924    builder.finish_node(); // MultilineTable
2925
2926    // Calculate lines consumed
2927    let table_start = caption_before.map(|(start, _)| start).unwrap_or(start_pos);
2928    let table_end = if let Some((_, cap_end)) = caption_after {
2929        cap_end
2930    } else {
2931        end_pos
2932    };
2933
2934    Some(table_end - table_start)
2935}
2936
2937/// Extract cell contents from first line only (for CST emission).
2938/// Multi-line content will be in continuation TEXT tokens.
2939fn extract_first_line_cell_contents(line: &str, columns: &[Column]) -> Vec<String> {
2940    let (line_content, _) = strip_newline(line);
2941    let mut cells = Vec::new();
2942
2943    for column in columns.iter() {
2944        let column_start = column_offset_to_byte_index(line_content, column.start);
2945        let column_end = column_offset_to_byte_index(line_content, column.end);
2946
2947        // Extract FULL text for this column (including whitespace)
2948        let cell_text = if column_start < column_end {
2949            &line_content[column_start..column_end]
2950        } else if column_start < line_content.len() {
2951            &line_content[column_start..]
2952        } else {
2953            ""
2954        };
2955
2956        cells.push(cell_text.to_string());
2957    }
2958
2959    cells
2960}
2961
2962/// Emit a multiline table row with inline parsing (Phase 7.1).
2963///
2964/// `indices` are ABSOLUTE line indices into the window's raw buffer; each
2965/// physical line re-emits its container prefix (`  > `) via the window before
2966/// its content. With an empty prefix the tails equal the raw lines, so emission
2967/// is byte-identical to the pre-window path.
2968fn emit_multiline_table_row(
2969    builder: &mut GreenNodeBuilder<'static>,
2970    window: &StrippedLines<'_, '_>,
2971    indices: &[usize],
2972    columns: &[Column],
2973    kind: SyntaxKind,
2974    config: &ParserOptions,
2975) {
2976    if indices.is_empty() {
2977        return;
2978    }
2979
2980    builder.start_node(kind.into());
2981
2982    // Emit the first line's container prefix as tokens, then slice cells from
2983    // the prefix-stripped tail (for CST losslessness, only the first physical
2984    // line is parsed into cells; continuation lines stay verbatim TEXT).
2985    let first_line = window.emit_or_dispatch_tail(builder, indices[0]);
2986    let cell_contents = extract_first_line_cell_contents(first_line, columns);
2987    let (trimmed, newline_str) = strip_newline(first_line);
2988    let mut current_pos = 0;
2989
2990    for (col_idx, column) in columns.iter().enumerate() {
2991        let cell_text = &cell_contents[col_idx];
2992        let cell_start = column_offset_to_byte_index(trimmed, column.start);
2993        let cell_end = column_offset_to_byte_index(trimmed, column.end);
2994
2995        // Emit whitespace before cell
2996        if current_pos < cell_start {
2997            builder.token(
2998                SyntaxKind::WHITESPACE.into(),
2999                &trimmed[current_pos..cell_start],
3000            );
3001        }
3002
3003        // Emit cell with inline parsing (first line content only)
3004        emit_table_cell(builder, cell_text, config);
3005
3006        current_pos = cell_end;
3007    }
3008
3009    // Emit trailing whitespace
3010    if current_pos < trimmed.len() {
3011        builder.token(SyntaxKind::WHITESPACE.into(), &trimmed[current_pos..]);
3012    }
3013
3014    // Emit newline
3015    if !newline_str.is_empty() {
3016        builder.token(SyntaxKind::NEWLINE.into(), newline_str);
3017    }
3018
3019    // Emit continuation lines as TEXT to preserve exact line structure,
3020    // re-emitting each line's container prefix first.
3021    for &idx in &indices[1..] {
3022        let tail = window.emit_or_dispatch_tail(builder, idx);
3023        emit_line_tokens(builder, tail);
3024    }
3025
3026    builder.finish_node();
3027}
3028
3029#[cfg(test)]
3030mod multiline_table_tests {
3031    use super::super::container_prefix::ContainerPrefix;
3032    use super::*;
3033    use crate::syntax::SyntaxNode;
3034
3035    #[test]
3036    fn test_multiline_separator_detection() {
3037        assert!(
3038            try_parse_multiline_separator(
3039                "-------------------------------------------------------------"
3040            )
3041            .is_some()
3042        );
3043        assert!(try_parse_multiline_separator("---").is_some());
3044        assert!(try_parse_multiline_separator("  -----").is_some()); // with leading spaces
3045        assert!(try_parse_multiline_separator("--").is_none()); // too short
3046        assert!(try_parse_multiline_separator("--- ---").is_none()); // has spaces
3047        assert!(try_parse_multiline_separator("+---+").is_none()); // grid separator
3048    }
3049
3050    #[test]
3051    fn test_basic_multiline_table() {
3052        let input = vec![
3053            "-------------------------------------------------------------",
3054            " Centered   Default           Right Left",
3055            "  Header    Aligned         Aligned Aligned",
3056            "----------- ------- --------------- -------------------------",
3057            "   First    row                12.0 Example of a row that",
3058            "                                    spans multiple lines.",
3059            "",
3060            "  Second    row                 5.0 Here's another one.",
3061            "-------------------------------------------------------------",
3062            "",
3063        ];
3064
3065        let mut builder = GreenNodeBuilder::new();
3066        let prefix = ContainerPrefix::default();
3067        let window = StrippedLines::new(&input, 0, &prefix);
3068        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3069
3070        assert!(result.is_some());
3071        assert_eq!(result.unwrap(), 9);
3072    }
3073
3074    #[test]
3075    fn test_multiline_table_headerless() {
3076        let input = vec![
3077            "----------- ------- --------------- -------------------------",
3078            "   First    row                12.0 Example of a row that",
3079            "                                    spans multiple lines.",
3080            "",
3081            "  Second    row                 5.0 Here's another one.",
3082            "----------- ------- --------------- -------------------------",
3083            "",
3084        ];
3085
3086        let mut builder = GreenNodeBuilder::new();
3087        let prefix = ContainerPrefix::default();
3088        let window = StrippedLines::new(&input, 0, &prefix);
3089        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3090
3091        assert!(result.is_some());
3092        assert_eq!(result.unwrap(), 6);
3093    }
3094
3095    #[test]
3096    fn test_multiline_table_headerless_single_line_is_not_multiline() {
3097        let input = vec![
3098            "-------     ------ ----------   -------",
3099            "     12     12        12             12",
3100            "-------     ------ ----------   -------",
3101            "",
3102            "Not part of table.",
3103            "",
3104        ];
3105
3106        let mut builder = GreenNodeBuilder::new();
3107        let prefix = ContainerPrefix::default();
3108        let window = StrippedLines::new(&input, 0, &prefix);
3109        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3110
3111        assert!(result.is_none());
3112    }
3113
3114    #[test]
3115    fn test_multiline_table_headerless_single_row_continuation_without_blank_line() {
3116        let input = vec![
3117            "----------  ---------  -----------  ---------------------------",
3118            "   First    row               12.0  Example of a row that spans",
3119            "                                    multiple lines.",
3120            "----------  ---------  -----------  ---------------------------",
3121            "",
3122        ];
3123
3124        let mut builder = GreenNodeBuilder::new();
3125        let prefix = ContainerPrefix::default();
3126        let window = StrippedLines::new(&input, 0, &prefix);
3127        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3128
3129        assert!(result.is_some());
3130        assert_eq!(result.unwrap(), 4);
3131    }
3132
3133    #[test]
3134    fn test_multiline_table_with_caption() {
3135        let input = vec![
3136            "-------------------------------------------------------------",
3137            " Col1       Col2",
3138            "----------- -------",
3139            "   A        B",
3140            "",
3141            "-------------------------------------------------------------",
3142            "",
3143            "Table: Here's the caption.",
3144            "",
3145        ];
3146
3147        let mut builder = GreenNodeBuilder::new();
3148        let prefix = ContainerPrefix::default();
3149        let window = StrippedLines::new(&input, 0, &prefix);
3150        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3151
3152        assert!(result.is_some());
3153        // table (6 lines) + blank + caption
3154        assert_eq!(result.unwrap(), 8);
3155    }
3156
3157    #[test]
3158    fn test_multiline_table_single_row() {
3159        let input = vec![
3160            "---------------------------------------------",
3161            " Header1    Header2",
3162            "----------- -----------",
3163            "   Data     More data",
3164            "",
3165            "---------------------------------------------",
3166            "",
3167        ];
3168
3169        let mut builder = GreenNodeBuilder::new();
3170        let prefix = ContainerPrefix::default();
3171        let window = StrippedLines::new(&input, 0, &prefix);
3172        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3173
3174        assert!(result.is_some());
3175        assert_eq!(result.unwrap(), 6);
3176    }
3177
3178    #[test]
3179    fn test_headerless_multiline_table_does_not_close_on_full_width_rule() {
3180        let input = vec![
3181            "- - - - -",
3182            "Third section with underscores.",
3183            "",
3184            "_____",
3185            "",
3186            "> Quote before rule",
3187            ">",
3188            "> ***",
3189            ">",
3190            "> Quote after rule",
3191            "",
3192            "Final paragraph.",
3193            "",
3194            "Here's a horizontal rule:",
3195            "",
3196            "---",
3197            "Text directly after the horizontal rule.",
3198            "",
3199        ];
3200
3201        let mut builder = GreenNodeBuilder::new();
3202        let prefix = ContainerPrefix::default();
3203        let window = StrippedLines::new(&input, 0, &prefix);
3204        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3205
3206        assert!(result.is_none());
3207    }
3208
3209    #[test]
3210    fn test_not_multiline_table() {
3211        // Simple table should not be parsed as multiline
3212        let input = vec![
3213            "  Right     Left     Center     Default",
3214            "-------     ------ ----------   -------",
3215            "     12     12        12            12",
3216            "",
3217        ];
3218
3219        let mut builder = GreenNodeBuilder::new();
3220        let prefix = ContainerPrefix::default();
3221        let window = StrippedLines::new(&input, 0, &prefix);
3222        let result = try_parse_multiline_table(&window, &mut builder, &ParserOptions::default());
3223
3224        // Should not parse because first line isn't a full-width separator
3225        assert!(result.is_none());
3226    }
3227
3228    // Phase 7.1: Unit tests for emit_table_cell() helper
3229    #[test]
3230    fn test_emit_table_cell_plain_text() {
3231        let mut builder = GreenNodeBuilder::new();
3232        emit_table_cell(&mut builder, "Cell", &ParserOptions::default());
3233        let green = builder.finish();
3234        let node = SyntaxNode::new_root(green);
3235
3236        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3237        assert_eq!(node.text(), "Cell");
3238
3239        // Should have TEXT child
3240        let children: Vec<_> = node.children_with_tokens().collect();
3241        assert_eq!(children.len(), 1);
3242        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
3243    }
3244
3245    #[test]
3246    fn test_emit_table_cell_with_emphasis() {
3247        let mut builder = GreenNodeBuilder::new();
3248        emit_table_cell(&mut builder, "*italic*", &ParserOptions::default());
3249        let green = builder.finish();
3250        let node = SyntaxNode::new_root(green);
3251
3252        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3253        assert_eq!(node.text(), "*italic*");
3254
3255        // Should have EMPHASIS child
3256        let children: Vec<_> = node.children().collect();
3257        assert_eq!(children.len(), 1);
3258        assert_eq!(children[0].kind(), SyntaxKind::EMPHASIS);
3259    }
3260
3261    #[test]
3262    fn test_emit_table_cell_with_code() {
3263        let mut builder = GreenNodeBuilder::new();
3264        emit_table_cell(&mut builder, "`code`", &ParserOptions::default());
3265        let green = builder.finish();
3266        let node = SyntaxNode::new_root(green);
3267
3268        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3269        assert_eq!(node.text(), "`code`");
3270
3271        // Should have CODE_SPAN child
3272        let children: Vec<_> = node.children().collect();
3273        assert_eq!(children.len(), 1);
3274        assert_eq!(children[0].kind(), SyntaxKind::INLINE_CODE);
3275    }
3276
3277    #[test]
3278    fn test_emit_table_cell_with_link() {
3279        let mut builder = GreenNodeBuilder::new();
3280        emit_table_cell(&mut builder, "[text](url)", &ParserOptions::default());
3281        let green = builder.finish();
3282        let node = SyntaxNode::new_root(green);
3283
3284        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3285        assert_eq!(node.text(), "[text](url)");
3286
3287        // Should have LINK child
3288        let children: Vec<_> = node.children().collect();
3289        assert_eq!(children.len(), 1);
3290        assert_eq!(children[0].kind(), SyntaxKind::LINK);
3291    }
3292
3293    #[test]
3294    fn test_emit_table_cell_with_strong() {
3295        let mut builder = GreenNodeBuilder::new();
3296        emit_table_cell(&mut builder, "**bold**", &ParserOptions::default());
3297        let green = builder.finish();
3298        let node = SyntaxNode::new_root(green);
3299
3300        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3301        assert_eq!(node.text(), "**bold**");
3302
3303        // Should have STRONG child
3304        let children: Vec<_> = node.children().collect();
3305        assert_eq!(children.len(), 1);
3306        assert_eq!(children[0].kind(), SyntaxKind::STRONG);
3307    }
3308
3309    #[test]
3310    fn test_emit_table_cell_mixed_inline() {
3311        let mut builder = GreenNodeBuilder::new();
3312        emit_table_cell(
3313            &mut builder,
3314            "Text **bold** and `code`",
3315            &ParserOptions::default(),
3316        );
3317        let green = builder.finish();
3318        let node = SyntaxNode::new_root(green);
3319
3320        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3321        assert_eq!(node.text(), "Text **bold** and `code`");
3322
3323        // Should have multiple children: TEXT, STRONG, TEXT, CODE_SPAN
3324        let children: Vec<_> = node.children_with_tokens().collect();
3325        assert!(children.len() >= 4);
3326
3327        // Check some expected types
3328        assert_eq!(children[0].kind(), SyntaxKind::TEXT);
3329        assert_eq!(children[1].kind(), SyntaxKind::STRONG);
3330    }
3331
3332    #[test]
3333    fn test_emit_table_cell_empty() {
3334        let mut builder = GreenNodeBuilder::new();
3335        emit_table_cell(&mut builder, "", &ParserOptions::default());
3336        let green = builder.finish();
3337        let node = SyntaxNode::new_root(green);
3338
3339        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3340        assert_eq!(node.text(), "");
3341
3342        // Empty cell should have no children
3343        let children: Vec<_> = node.children_with_tokens().collect();
3344        assert_eq!(children.len(), 0);
3345    }
3346
3347    #[test]
3348    fn test_emit_table_cell_escaped_pipe() {
3349        let mut builder = GreenNodeBuilder::new();
3350        emit_table_cell(&mut builder, r"A \| B", &ParserOptions::default());
3351        let green = builder.finish();
3352        let node = SyntaxNode::new_root(green);
3353
3354        assert_eq!(node.kind(), SyntaxKind::TABLE_CELL);
3355        // The escaped pipe should be preserved
3356        assert_eq!(node.text(), r"A \| B");
3357    }
3358}