panache_parser/parser/blocks/
lists.rs

1use crate::options::ParserOptions;
2use crate::syntax::SyntaxKind;
3use rowan::GreenNodeBuilder;
4
5use crate::parser::utils::container_stack::{
6    Container, ContainerStack, leading_indent, leading_indent_from,
7};
8use crate::parser::utils::helpers::{strip_newline, trim_end_newlines};
9use crate::parser::utils::list_item_buffer::ListItemBuffer;
10
11#[derive(Debug, Clone, PartialEq)]
12pub(crate) enum ListMarker {
13    Bullet(char),
14    Ordered(OrderedMarker),
15}
16
17#[derive(Debug, Clone, PartialEq)]
18pub(crate) enum OrderedMarker {
19    Decimal {
20        number: String,
21        style: ListDelimiter,
22    },
23    Hash,
24    LowerAlpha {
25        letter: char,
26        style: ListDelimiter,
27    },
28    UpperAlpha {
29        letter: char,
30        style: ListDelimiter,
31    },
32    LowerRoman {
33        numeral: String,
34        style: ListDelimiter,
35    },
36    UpperRoman {
37        numeral: String,
38        style: ListDelimiter,
39    },
40    Example {
41        label: Option<String>,
42    },
43}
44
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub(crate) enum ListDelimiter {
47    Period,
48    RightParen,
49    Parens,
50}
51
52#[derive(Debug, Clone, PartialEq)]
53pub(crate) struct ListMarkerMatch {
54    pub(crate) marker: ListMarker,
55    pub(crate) marker_len: usize,
56    pub(crate) spaces_after_cols: usize,
57    pub(crate) spaces_after_bytes: usize,
58    /// True when CommonMark's "≥ 5 cols of post-marker whitespace → marker + 1
59    /// virtual space; rest belongs to content" rule fired during marker
60    /// detection. The marker's required 1 col of trailing space was virtually
61    /// absorbed (typically from a tab) rather than consumed as a literal byte;
62    /// the surplus whitespace is left in the post-marker text so block-level
63    /// detection can recognize it as an indented code block.
64    pub(crate) virtual_marker_space: bool,
65}
66
67#[derive(Debug, Clone, Copy)]
68pub(in crate::parser) struct ListItemEmissionInput<'a> {
69    pub content: &'a str,
70    pub marker_len: usize,
71    pub spaces_after_cols: usize,
72    pub spaces_after_bytes: usize,
73    pub indent_cols: usize,
74    pub indent_bytes: usize,
75    pub virtual_marker_space: bool,
76}
77
78/// Parse a Roman numeral (lower or upper case).
79/// Returns the byte-length of the numeral if valid, None otherwise.
80///
81/// Byte-level and allocation-free. Callers (`try_parse_list_marker` for
82/// fancy-list ordering) hit this on every line, so the prior path —
83/// `to_uppercase` String + repeated `Vec<char>::collect` + an always-
84/// allocated `String` return — was a profile hotspot. All Roman numeral
85/// chars are ASCII; map to canonical-upper byte via `b & !0x20` and
86/// validate without heap traffic. Callers slice the original input
87/// only on a confirmed full match (when the trailing `.` / `)` is
88/// also present), so the `String` cost is moved off the no-match path.
89fn try_parse_roman_numeral(text: &str, uppercase: bool) -> Option<usize> {
90    let bytes = text.as_bytes();
91    // Take while ASCII char is one of `IVXLCDM` (case-folded).
92    let mut count = 0usize;
93    while count < bytes.len() {
94        let b = bytes[count];
95        let valid = if uppercase {
96            matches!(b, b'I' | b'V' | b'X' | b'L' | b'C' | b'D' | b'M')
97        } else {
98            matches!(b, b'i' | b'v' | b'x' | b'l' | b'c' | b'd' | b'm')
99        };
100        if !valid {
101            break;
102        }
103        count += 1;
104    }
105
106    if count == 0 {
107        return None;
108    }
109
110    // For single-character numerals, only accept the most common ones to avoid
111    // ambiguity with alphabetic list markers (a-z, A-Z).
112    if count == 1 {
113        let upper = bytes[0] & !0x20;
114        if !matches!(upper, b'I' | b'V' | b'X') {
115            return None;
116        }
117    }
118
119    // Reject sequences of >= 4 consecutive same chars (case-insensitive).
120    // Also reject doubled V/L/D (only ever appear once in valid Romans).
121    let mut run_byte = 0u8;
122    let mut run_len = 0usize;
123    for &b in &bytes[..count] {
124        let upper = b & !0x20;
125        if upper == run_byte {
126            run_len += 1;
127        } else {
128            run_byte = upper;
129            run_len = 1;
130        }
131        if (run_len > 3 && matches!(upper, b'I' | b'X' | b'C'))
132            || (run_len > 1 && matches!(upper, b'V' | b'L' | b'D'))
133        {
134            return None;
135        }
136    }
137
138    // Validate subtractive notation: V/L/D can never precede a larger
139    // numeral; I, X, C only precede the next two larger units.
140    fn val(upper: u8) -> u32 {
141        match upper {
142            b'I' => 1,
143            b'V' => 5,
144            b'X' => 10,
145            b'L' => 50,
146            b'C' => 100,
147            b'D' => 500,
148            b'M' => 1000,
149            _ => 0,
150        }
151    }
152    for i in 0..count.saturating_sub(1) {
153        let curr = bytes[i] & !0x20;
154        let next = bytes[i + 1] & !0x20;
155        let cv = val(curr);
156        let nv = val(next);
157        if cv < nv {
158            match (curr, next) {
159                (b'I', b'V') | (b'I', b'X') => {}
160                (b'X', b'L') | (b'X', b'C') => {}
161                (b'C', b'D') | (b'C', b'M') => {}
162                _ => return None,
163            }
164        }
165    }
166    Some(count)
167}
168
169/// Compute (spaces_after_cols, spaces_after_bytes, virtual_marker_space) for a
170/// post-marker string starting at column `marker_end_col` of the source line.
171///
172/// Implements CommonMark §5.2 rule #2: when the effective column-width of the
173/// post-marker whitespace (counted with tabs expanding from `marker_end_col`)
174/// is ≥ 5 and there is non-empty content after it, the list item's content
175/// column is `marker_end_col + 1` (the marker plus exactly one — possibly
176/// virtual — space). The surplus whitespace is left in the post-marker text
177/// so block-level dispatch can recognize it as an indented code block.
178///
179/// In the rule case, when the first byte is a tab whose source-column span
180/// exceeds 1, no bytes are consumed (the tab stays in content) and
181/// `virtual_marker_space` is true. Otherwise the byte count describes the
182/// literal whitespace consumed as marker space.
183fn marker_spaces_after(after_marker: &str, marker_end_col: usize) -> (usize, usize, bool) {
184    let (effective_cols, n_bytes) = leading_indent_from(after_marker, marker_end_col);
185    let after_ws = &after_marker[n_bytes..];
186    let has_content = !trim_end_newlines(after_ws).is_empty();
187    if has_content && effective_cols >= 5 {
188        let bytes = match after_marker.as_bytes().first() {
189            Some(b' ') => 1,
190            Some(b'\t') => {
191                let span = 4 - (marker_end_col % 4);
192                if span == 1 { 1 } else { 0 }
193            }
194            _ => 0,
195        };
196        (1, bytes, bytes == 0)
197    } else {
198        (effective_cols, n_bytes, false)
199    }
200}
201
202pub(crate) fn try_parse_list_marker(line: &str, config: &ParserOptions) -> Option<ListMarkerMatch> {
203    // Trailing newlines should not block bare-marker detection; the line `*\n`
204    // is a bare bullet marker and the post-marker text is logically empty.
205    let line = trim_end_newlines(line);
206    let (_indent_cols, indent_bytes) = leading_indent(line);
207    let trimmed = &line[indent_bytes..];
208
209    // Try bullet markers (including task lists)
210    if let Some(ch) = trimmed.chars().next()
211        && matches!(ch, '*' | '+' | '-')
212    {
213        let after_marker = &trimmed[1..];
214
215        // Check for task list: [ ] or [x] or [X]
216        let trimmed_after = after_marker.trim_start();
217        let is_task = trimmed_after.starts_with('[')
218            && trimmed_after.len() >= 3
219            && matches!(
220                trimmed_after.chars().nth(1),
221                Some(' ') | Some('x') | Some('X')
222            )
223            && trimmed_after.chars().nth(2) == Some(']');
224
225        // Must be followed by whitespace (or be task list)
226        if after_marker.starts_with(' ')
227            || after_marker.starts_with('\t')
228            || after_marker.is_empty()
229            || is_task
230        {
231            let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
232                marker_spaces_after(after_marker, _indent_cols + 1);
233            return Some(ListMarkerMatch {
234                marker: ListMarker::Bullet(ch),
235                marker_len: 1,
236                spaces_after_cols,
237                spaces_after_bytes,
238                virtual_marker_space,
239            });
240        }
241    }
242
243    // Try ordered markers
244    if config.extensions.fancy_lists
245        && let Some(after_marker) = trimmed.strip_prefix("#.")
246        && (after_marker.starts_with(' ')
247            || after_marker.starts_with('\t')
248            || after_marker.is_empty())
249    {
250        let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
251            marker_spaces_after(after_marker, _indent_cols + 2);
252        return Some(ListMarkerMatch {
253            marker: ListMarker::Ordered(OrderedMarker::Hash),
254            marker_len: 2,
255            spaces_after_cols,
256            spaces_after_bytes,
257            virtual_marker_space,
258        });
259    }
260
261    // Try example lists: (@) or (@label)
262    if config.extensions.example_lists
263        && let Some(rest) = trimmed.strip_prefix("(@")
264    {
265        // Check if it has a label or is just (@)
266        let label_end = rest
267            .chars()
268            .take_while(|c| c.is_alphanumeric() || *c == '_' || *c == '-')
269            .count();
270
271        // Must be followed by ')'
272        if rest.len() > label_end && rest.chars().nth(label_end) == Some(')') {
273            let label = if label_end > 0 {
274                Some(rest[..label_end].to_string())
275            } else {
276                None
277            };
278
279            let after_marker = &rest[label_end + 1..];
280            if after_marker.starts_with(' ')
281                || after_marker.starts_with('\t')
282                || after_marker.is_empty()
283            {
284                let marker_len = 2 + label_end + 1; // "(@" + label + ")"
285                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
286                    marker_spaces_after(after_marker, _indent_cols + marker_len);
287                return Some(ListMarkerMatch {
288                    marker: ListMarker::Ordered(OrderedMarker::Example { label }),
289                    marker_len,
290                    spaces_after_cols,
291                    spaces_after_bytes,
292                    virtual_marker_space,
293                });
294            }
295        }
296    }
297
298    // Try parenthesized markers: (2), (a), (ii)
299    if let Some(rest) = trimmed.strip_prefix('(') {
300        if config.extensions.fancy_lists {
301            // Try decimal: (2)
302            let digit_count = rest.chars().take_while(|c| c.is_ascii_digit()).count();
303            if digit_count > 0
304                && rest.len() > digit_count
305                && rest.chars().nth(digit_count) == Some(')')
306            {
307                let number = &rest[..digit_count];
308                let after_marker = &rest[digit_count + 1..];
309                if after_marker.starts_with(' ')
310                    || after_marker.starts_with('\t')
311                    || after_marker.is_empty()
312                {
313                    let marker_len = 2 + digit_count;
314                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
315                        marker_spaces_after(after_marker, _indent_cols + marker_len);
316                    return Some(ListMarkerMatch {
317                        marker: ListMarker::Ordered(OrderedMarker::Decimal {
318                            number: number.to_string(),
319                            style: ListDelimiter::Parens,
320                        }),
321                        marker_len,
322                        spaces_after_cols,
323                        spaces_after_bytes,
324                        virtual_marker_space,
325                    });
326                }
327            }
328        }
329
330        // Try fancy lists if enabled (parenthesized markers)
331        if config.extensions.fancy_lists {
332            // Try Roman numerals first (to avoid ambiguity with letters i, v, x, etc.)
333
334            // Try lowercase Roman: (ii)
335            if let Some(len) = try_parse_roman_numeral(rest, false)
336                && rest.len() > len
337                && rest.as_bytes()[len] == b')'
338            {
339                let after_marker = &rest[len + 1..];
340                if after_marker.starts_with(' ')
341                    || after_marker.starts_with('\t')
342                    || after_marker.is_empty()
343                {
344                    let marker_len = len + 2;
345                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
346                        marker_spaces_after(after_marker, _indent_cols + marker_len);
347                    return Some(ListMarkerMatch {
348                        marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
349                            numeral: rest[..len].to_string(),
350                            style: ListDelimiter::Parens,
351                        }),
352                        marker_len,
353                        spaces_after_cols,
354                        spaces_after_bytes,
355                        virtual_marker_space,
356                    });
357                }
358            }
359
360            // Try uppercase Roman: (II)
361            if let Some(len) = try_parse_roman_numeral(rest, true)
362                && rest.len() > len
363                && rest.as_bytes()[len] == b')'
364            {
365                let after_marker = &rest[len + 1..];
366                if after_marker.starts_with(' ')
367                    || after_marker.starts_with('\t')
368                    || after_marker.is_empty()
369                {
370                    let marker_len = len + 2;
371                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
372                        marker_spaces_after(after_marker, _indent_cols + marker_len);
373                    return Some(ListMarkerMatch {
374                        marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
375                            numeral: rest[..len].to_string(),
376                            style: ListDelimiter::Parens,
377                        }),
378                        marker_len,
379                        spaces_after_cols,
380                        spaces_after_bytes,
381                        virtual_marker_space,
382                    });
383                }
384            }
385
386            // Try lowercase letter: (a)
387            if let Some(ch) = rest.chars().next()
388                && ch.is_ascii_lowercase()
389                && rest.len() > 1
390                && rest.chars().nth(1) == Some(')')
391            {
392                let after_marker = &rest[2..];
393                if after_marker.starts_with(' ')
394                    || after_marker.starts_with('\t')
395                    || after_marker.is_empty()
396                {
397                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
398                        marker_spaces_after(after_marker, _indent_cols + 3);
399                    return Some(ListMarkerMatch {
400                        marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
401                            letter: ch,
402                            style: ListDelimiter::Parens,
403                        }),
404                        marker_len: 3,
405                        spaces_after_cols,
406                        spaces_after_bytes,
407                        virtual_marker_space,
408                    });
409                }
410            }
411
412            // Try uppercase letter: (A)
413            if let Some(ch) = rest.chars().next()
414                && ch.is_ascii_uppercase()
415                && rest.len() > 1
416                && rest.chars().nth(1) == Some(')')
417            {
418                let after_marker = &rest[2..];
419                if after_marker.starts_with(' ')
420                    || after_marker.starts_with('\t')
421                    || after_marker.is_empty()
422                {
423                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
424                        marker_spaces_after(after_marker, _indent_cols + 3);
425                    return Some(ListMarkerMatch {
426                        marker: ListMarker::Ordered(OrderedMarker::UpperAlpha {
427                            letter: ch,
428                            style: ListDelimiter::Parens,
429                        }),
430                        marker_len: 3,
431                        spaces_after_cols,
432                        spaces_after_bytes,
433                        virtual_marker_space,
434                    });
435                }
436            }
437        }
438    }
439
440    // Try decimal numbers: 1. or 1)
441    let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
442    if digit_count > 0 && trimmed.len() > digit_count {
443        // CommonMark restricts ordered list markers to 1-9 digits (spec §5.2).
444        // Pandoc-markdown accepts arbitrary digit counts.
445        if config.dialect == crate::Dialect::CommonMark && digit_count > 9 {
446            return None;
447        }
448
449        let number = &trimmed[..digit_count];
450        let delim = trimmed.chars().nth(digit_count);
451
452        let (style, marker_len) = match delim {
453            Some('.') => (ListDelimiter::Period, digit_count + 1),
454            Some(')') => (ListDelimiter::RightParen, digit_count + 1),
455            _ => return None,
456        };
457        // CommonMark §5.2: decimal `1)` markers are part of the core grammar.
458        // Pandoc-markdown gates `)`-style ordered markers behind `fancy_lists`.
459        if style == ListDelimiter::RightParen
460            && !config.extensions.fancy_lists
461            && config.dialect != crate::Dialect::CommonMark
462        {
463            return None;
464        }
465
466        let after_marker = &trimmed[marker_len..];
467        if after_marker.starts_with(' ')
468            || after_marker.starts_with('\t')
469            || after_marker.is_empty()
470        {
471            let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
472                marker_spaces_after(after_marker, _indent_cols + marker_len);
473            return Some(ListMarkerMatch {
474                marker: ListMarker::Ordered(OrderedMarker::Decimal {
475                    number: number.to_string(),
476                    style,
477                }),
478                marker_len,
479                spaces_after_cols,
480                spaces_after_bytes,
481                virtual_marker_space,
482            });
483        }
484    }
485
486    // Try fancy lists if enabled (non-parenthesized)
487    if config.extensions.fancy_lists {
488        // Try Roman numerals first, as they may overlap with letters
489
490        // Try lowercase Roman: i. or ii)
491        if let Some(len) = try_parse_roman_numeral(trimmed, false)
492            && trimmed.len() > len
493            && let delim = trimmed.as_bytes()[len]
494            && (delim == b'.' || delim == b')')
495        {
496            let style = if delim == b'.' {
497                ListDelimiter::Period
498            } else {
499                ListDelimiter::RightParen
500            };
501            let marker_len = len + 1;
502
503            let after_marker = &trimmed[marker_len..];
504            if after_marker.starts_with(' ')
505                || after_marker.starts_with('\t')
506                || after_marker.is_empty()
507            {
508                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
509                    marker_spaces_after(after_marker, _indent_cols + marker_len);
510                return Some(ListMarkerMatch {
511                    marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
512                        numeral: trimmed[..len].to_string(),
513                        style,
514                    }),
515                    marker_len,
516                    spaces_after_cols,
517                    spaces_after_bytes,
518                    virtual_marker_space,
519                });
520            }
521        }
522
523        // Try uppercase Roman: I. or II)
524        if let Some(len) = try_parse_roman_numeral(trimmed, true)
525            && trimmed.len() > len
526            && let delim = trimmed.as_bytes()[len]
527            && (delim == b'.' || delim == b')')
528        {
529            let style = if delim == b'.' {
530                ListDelimiter::Period
531            } else {
532                ListDelimiter::RightParen
533            };
534            let marker_len = len + 1;
535
536            let after_marker = &trimmed[marker_len..];
537            // Pandoc: single-character uppercase Roman (I, V, X, L, C, D, M)
538            // followed by `.` requires two spaces, to avoid confusion with
539            // initials like "I. M. Pei". Multi-character romans (II., XII.,
540            // …) and the right-paren form (I)) only need one space. See
541            // pandoc/src/Text/Pandoc/Readers/Markdown.hs `orderedListStart`.
542            let min_spaces = if delim == b'.' && len == 1 { 2 } else { 1 };
543            let (effective_cols, _) = leading_indent_from(after_marker, _indent_cols + marker_len);
544
545            if (after_marker.starts_with(' ')
546                || after_marker.starts_with('\t')
547                || after_marker.is_empty())
548                && (after_marker.is_empty() || effective_cols >= min_spaces)
549            {
550                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
551                    marker_spaces_after(after_marker, _indent_cols + marker_len);
552                return Some(ListMarkerMatch {
553                    marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
554                        numeral: trimmed[..len].to_string(),
555                        style,
556                    }),
557                    marker_len,
558                    spaces_after_cols,
559                    spaces_after_bytes,
560                    virtual_marker_space,
561                });
562            }
563        }
564
565        // Try lowercase letter: a. or a)
566        if let Some(ch) = trimmed.chars().next()
567            && ch.is_ascii_lowercase()
568            && trimmed.len() > 1
569            && let Some(delim) = trimmed.chars().nth(1)
570            && (delim == '.' || delim == ')')
571        {
572            let style = if delim == '.' {
573                ListDelimiter::Period
574            } else {
575                ListDelimiter::RightParen
576            };
577            let marker_len = 2;
578
579            let after_marker = &trimmed[marker_len..];
580            if after_marker.starts_with(' ')
581                || after_marker.starts_with('\t')
582                || after_marker.is_empty()
583            {
584                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
585                    marker_spaces_after(after_marker, _indent_cols + marker_len);
586                return Some(ListMarkerMatch {
587                    marker: ListMarker::Ordered(OrderedMarker::LowerAlpha { letter: ch, style }),
588                    marker_len,
589                    spaces_after_cols,
590                    spaces_after_bytes,
591                    virtual_marker_space,
592                });
593            }
594        }
595
596        // Try uppercase letter: A. or A)
597        if let Some(ch) = trimmed.chars().next()
598            && ch.is_ascii_uppercase()
599            && trimmed.len() > 1
600            && let Some(delim) = trimmed.chars().nth(1)
601            && (delim == '.' || delim == ')')
602        {
603            let style = if delim == '.' {
604                ListDelimiter::Period
605            } else {
606                ListDelimiter::RightParen
607            };
608            let marker_len = 2;
609
610            let after_marker = &trimmed[marker_len..];
611            // Special rule: uppercase letter with period needs 2 spaces minimum
612            let min_spaces = if delim == '.' { 2 } else { 1 };
613            let (effective_cols, _) = leading_indent_from(after_marker, _indent_cols + marker_len);
614
615            if (after_marker.starts_with(' ') || after_marker.starts_with('\t'))
616                && effective_cols >= min_spaces
617            {
618                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
619                    marker_spaces_after(after_marker, _indent_cols + marker_len);
620                return Some(ListMarkerMatch {
621                    marker: ListMarker::Ordered(OrderedMarker::UpperAlpha { letter: ch, style }),
622                    marker_len,
623                    spaces_after_cols,
624                    spaces_after_bytes,
625                    virtual_marker_space,
626                });
627            }
628        }
629    }
630
631    None
632}
633
634pub(crate) fn markers_match(a: &ListMarker, b: &ListMarker, dialect: crate::Dialect) -> bool {
635    match (a, b) {
636        // CommonMark §5.3: bullet list markers `-`, `+`, `*` are *distinct*
637        // bullet types — switching from one to another starts a new list.
638        // Pandoc-markdown treats them as interchangeable: any bullet
639        // continues an open bullet list. Verified with pandoc against
640        // `- foo\n- bar\n+ baz\n` (#301).
641        (ListMarker::Bullet(ca), ListMarker::Bullet(cb)) => match dialect {
642            crate::Dialect::CommonMark => ca == cb,
643            _ => true,
644        },
645        (ListMarker::Ordered(OrderedMarker::Hash), ListMarker::Ordered(OrderedMarker::Hash)) => {
646            true
647        }
648        (
649            ListMarker::Ordered(OrderedMarker::Decimal { style: s1, .. }),
650            ListMarker::Ordered(OrderedMarker::Decimal { style: s2, .. }),
651        ) => s1 == s2,
652        (
653            ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s1, .. }),
654            ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s2, .. }),
655        ) => s1 == s2,
656        (
657            ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s1, .. }),
658            ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s2, .. }),
659        ) => s1 == s2,
660        (
661            ListMarker::Ordered(OrderedMarker::LowerRoman { style: s1, .. }),
662            ListMarker::Ordered(OrderedMarker::LowerRoman { style: s2, .. }),
663        ) => s1 == s2,
664        (
665            ListMarker::Ordered(OrderedMarker::UpperRoman { style: s1, .. }),
666            ListMarker::Ordered(OrderedMarker::UpperRoman { style: s2, .. }),
667        ) => s1 == s2,
668        (
669            ListMarker::Ordered(OrderedMarker::Example { .. }),
670            ListMarker::Ordered(OrderedMarker::Example { .. }),
671        ) => true, // All example list items match each other
672        _ => false,
673    }
674}
675
676/// Emit a list item node to the builder (marker and whitespace only).
677/// Returns (content_col, text_to_buffer) where text_to_buffer is the content that should be
678/// added to the list item buffer for later inline parsing.
679pub(in crate::parser) fn emit_list_item(
680    builder: &mut GreenNodeBuilder<'static>,
681    item: &ListItemEmissionInput<'_>,
682) -> (usize, String) {
683    builder.start_node(SyntaxKind::LIST_ITEM.into());
684
685    // Emit leading indentation for lossless parsing
686    if item.indent_bytes > 0 {
687        builder.token(
688            SyntaxKind::WHITESPACE.into(),
689            &item.content[..item.indent_bytes],
690        );
691    }
692
693    let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
694    builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
695
696    if item.spaces_after_bytes > 0 {
697        let space_start = item.indent_bytes + item.marker_len;
698        let space_end = space_start + item.spaces_after_bytes;
699        if space_end <= item.content.len() {
700            builder.token(
701                SyntaxKind::WHITESPACE.into(),
702                &item.content[space_start..space_end],
703            );
704        }
705    }
706
707    let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
708    let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
709
710    // Extract text content to be buffered (instead of emitting it directly).
711    // If the item starts with a task checkbox, emit it as a dedicated token so it
712    // doesn't get parsed as a link.
713    let text_to_buffer = if content_start < item.content.len() {
714        let rest = &item.content[content_start..];
715        if (rest.starts_with("[ ]") || rest.starts_with("[x]") || rest.starts_with("[X]"))
716            && rest
717                .as_bytes()
718                .get(3)
719                .is_some_and(|b| (*b as char).is_whitespace())
720        {
721            builder.token(SyntaxKind::TASK_CHECKBOX.into(), &rest[..3]);
722            rest[3..].to_string()
723        } else {
724            rest.to_string()
725        }
726    } else {
727        String::new()
728    };
729
730    (content_col, text_to_buffer)
731}
732
733#[cfg(test)]
734mod tests {
735    use super::*;
736    use crate::options::ParserOptions;
737
738    #[test]
739    fn detects_bullet_markers() {
740        let config = ParserOptions::default();
741        assert!(try_parse_list_marker("* item", &config).is_some());
742        assert!(try_parse_list_marker("*\titem", &config).is_some());
743    }
744
745    #[test]
746    fn detects_fancy_alpha_markers() {
747        let mut config = ParserOptions::default();
748        config.extensions.fancy_lists = true;
749
750        // Test lowercase alpha period
751        assert!(
752            try_parse_list_marker("a. item", &config).is_some(),
753            "a. should parse"
754        );
755        assert!(
756            try_parse_list_marker("b. item", &config).is_some(),
757            "b. should parse"
758        );
759        assert!(
760            try_parse_list_marker("c. item", &config).is_some(),
761            "c. should parse"
762        );
763
764        // Test lowercase alpha right paren
765        assert!(
766            try_parse_list_marker("a) item", &config).is_some(),
767            "a) should parse"
768        );
769        assert!(
770            try_parse_list_marker("b) item", &config).is_some(),
771            "b) should parse"
772        );
773    }
774}
775
776#[test]
777fn markers_match_fancy_lists() {
778    use ListDelimiter::*;
779    use ListMarker::*;
780    use OrderedMarker::*;
781
782    // Same type and style should match
783    let a_period = Ordered(LowerAlpha {
784        letter: 'a',
785        style: Period,
786    });
787    let b_period = Ordered(LowerAlpha {
788        letter: 'b',
789        style: Period,
790    });
791    assert!(
792        markers_match(&a_period, &b_period, crate::Dialect::Pandoc),
793        "a. and b. should match"
794    );
795
796    let i_period = Ordered(LowerRoman {
797        numeral: "i".to_string(),
798        style: Period,
799    });
800    let ii_period = Ordered(LowerRoman {
801        numeral: "ii".to_string(),
802        style: Period,
803    });
804    assert!(
805        markers_match(&i_period, &ii_period, crate::Dialect::Pandoc),
806        "i. and ii. should match"
807    );
808
809    // Different styles should not match
810    let a_paren = Ordered(LowerAlpha {
811        letter: 'a',
812        style: RightParen,
813    });
814    assert!(
815        !markers_match(&a_period, &a_paren, crate::Dialect::Pandoc),
816        "a. and a) should not match"
817    );
818}
819
820#[test]
821fn markers_match_bullet_dialect_split() {
822    use ListMarker::*;
823    // Pandoc: any bullet matches any bullet (same list).
824    assert!(markers_match(
825        &Bullet('-'),
826        &Bullet('+'),
827        crate::Dialect::Pandoc
828    ));
829    // CommonMark: bullets match only when the marker character is the same.
830    assert!(markers_match(
831        &Bullet('-'),
832        &Bullet('-'),
833        crate::Dialect::CommonMark
834    ));
835    assert!(!markers_match(
836        &Bullet('-'),
837        &Bullet('+'),
838        crate::Dialect::CommonMark
839    ));
840    assert!(!markers_match(
841        &Bullet('*'),
842        &Bullet('-'),
843        crate::Dialect::CommonMark
844    ));
845}
846
847#[test]
848fn detects_complex_roman_numerals() {
849    let mut config = ParserOptions::default();
850    config.extensions.fancy_lists = true;
851
852    // Test various Roman numerals
853    assert!(
854        try_parse_list_marker("iv. item", &config).is_some(),
855        "iv. should parse"
856    );
857    assert!(
858        try_parse_list_marker("v. item", &config).is_some(),
859        "v. should parse"
860    );
861    assert!(
862        try_parse_list_marker("vi. item", &config).is_some(),
863        "vi. should parse"
864    );
865    assert!(
866        try_parse_list_marker("vii. item", &config).is_some(),
867        "vii. should parse"
868    );
869    assert!(
870        try_parse_list_marker("viii. item", &config).is_some(),
871        "viii. should parse"
872    );
873    assert!(
874        try_parse_list_marker("ix. item", &config).is_some(),
875        "ix. should parse"
876    );
877    assert!(
878        try_parse_list_marker("x. item", &config).is_some(),
879        "x. should parse"
880    );
881}
882
883#[test]
884fn detects_example_list_markers() {
885    let mut config = ParserOptions::default();
886    config.extensions.example_lists = true;
887
888    // Test unlabeled example
889    assert!(
890        try_parse_list_marker("(@) item", &config).is_some(),
891        "(@) should parse"
892    );
893
894    // Test labeled examples
895    assert!(
896        try_parse_list_marker("(@foo) item", &config).is_some(),
897        "(@foo) should parse"
898    );
899    assert!(
900        try_parse_list_marker("(@my_label) item", &config).is_some(),
901        "(@my_label) should parse"
902    );
903    assert!(
904        try_parse_list_marker("(@test-123) item", &config).is_some(),
905        "(@test-123) should parse"
906    );
907
908    // Test with extension disabled
909    let disabled_config = ParserOptions {
910        extensions: crate::options::Extensions {
911            example_lists: false,
912            ..Default::default()
913        },
914        ..Default::default()
915    };
916    assert!(
917        try_parse_list_marker("(@) item", &disabled_config).is_none(),
918        "(@) should not parse when extension disabled"
919    );
920}
921
922#[test]
923fn deep_ordered_prefers_nearest_enclosing_indent_over_nearest_below() {
924    use crate::parser::utils::container_stack::{Container, ContainerStack};
925
926    let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
927        numeral: "ii".to_string(),
928        style: ListDelimiter::Period,
929    });
930
931    let mut containers = ContainerStack::new();
932    containers.push(Container::List {
933        marker: marker.clone(),
934        base_indent_cols: 8,
935        has_blank_between_items: false,
936    });
937    containers.push(Container::ListItem {
938        content_col: 11,
939        buffer: crate::parser::utils::list_item_buffer::ListItemBuffer::new(),
940        marker_only: false,
941        virtual_marker_space: false,
942    });
943    containers.push(Container::List {
944        marker,
945        base_indent_cols: 6,
946        has_blank_between_items: false,
947    });
948
949    // With deep ordered drift (indent 7), we should keep the enclosing level
950    // (base indent 8), not re-associate to the nearest lower sibling level (6).
951    assert_eq!(
952        find_matching_list_level(
953            &containers,
954            &ListMarker::Ordered(OrderedMarker::LowerRoman {
955                numeral: "iii".to_string(),
956                style: ListDelimiter::Period,
957            }),
958            7,
959            crate::Dialect::Pandoc,
960        ),
961        Some(0)
962    );
963}
964
965#[test]
966fn deep_ordered_matches_exact_indent_when_available() {
967    use crate::parser::utils::container_stack::{Container, ContainerStack};
968
969    let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
970        numeral: "ii".to_string(),
971        style: ListDelimiter::Period,
972    });
973
974    let mut containers = ContainerStack::new();
975    containers.push(Container::List {
976        marker: marker.clone(),
977        base_indent_cols: 8,
978        has_blank_between_items: false,
979    });
980    containers.push(Container::List {
981        marker,
982        base_indent_cols: 6,
983        has_blank_between_items: false,
984    });
985
986    assert_eq!(
987        find_matching_list_level(
988            &containers,
989            &ListMarker::Ordered(OrderedMarker::LowerRoman {
990                numeral: "iii".to_string(),
991                style: ListDelimiter::Period,
992            }),
993            6,
994            crate::Dialect::Pandoc,
995        ),
996        Some(1)
997    );
998}
999
1000#[test]
1001fn parses_nested_bullet_list_from_single_marker() {
1002    use crate::parse;
1003    use crate::syntax::SyntaxKind;
1004
1005    let config = ParserOptions::default();
1006
1007    // Test all three bullet marker combinations as nested lists
1008    for (input, desc) in [("- *\n", "- *"), ("- +\n", "- +"), ("- -\n", "- -")] {
1009        let tree = parse(input, Some(config.clone()));
1010
1011        // tree IS the DOCUMENT node
1012        assert_eq!(
1013            tree.kind(),
1014            SyntaxKind::DOCUMENT,
1015            "{desc}: root should be DOCUMENT"
1016        );
1017
1018        // Should have a LIST as first child of DOCUMENT
1019        let outer_list = tree
1020            .children()
1021            .find(|n| n.kind() == SyntaxKind::LIST)
1022            .unwrap_or_else(|| panic!("{desc}: should have outer LIST node"));
1023
1024        // Outer list should have a LIST_ITEM
1025        let outer_item = outer_list
1026            .children()
1027            .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
1028            .unwrap_or_else(|| panic!("{desc}: should have outer LIST_ITEM"));
1029
1030        // Outer list item should contain a nested LIST (not PLAIN with TEXT)
1031        let nested_list = outer_item
1032            .children()
1033            .find(|n| n.kind() == SyntaxKind::LIST)
1034            .unwrap_or_else(|| {
1035                panic!(
1036                    "{desc}: outer LIST_ITEM should contain nested LIST, got: {:?}",
1037                    outer_item.children().map(|n| n.kind()).collect::<Vec<_>>()
1038                )
1039            });
1040
1041        // Nested list should have a LIST_ITEM
1042        let nested_item = nested_list
1043            .children()
1044            .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
1045            .unwrap_or_else(|| panic!("{desc}: nested LIST should have LIST_ITEM"));
1046
1047        // Nested list item should be empty (no PLAIN or TEXT content)
1048        let has_plain = nested_item
1049            .children()
1050            .any(|n| n.kind() == SyntaxKind::PLAIN);
1051        assert!(
1052            !has_plain,
1053            "{desc}: nested LIST_ITEM should not have PLAIN node (should be empty)"
1054        );
1055    }
1056}
1057
1058// Helper functions for list management in Parser
1059
1060/// Check if we're in any list.
1061pub(in crate::parser) fn in_list(containers: &ContainerStack) -> bool {
1062    containers
1063        .stack
1064        .iter()
1065        .any(|c| matches!(c, Container::List { .. }))
1066}
1067
1068/// Check if we're in a list inside a blockquote.
1069pub(in crate::parser) fn in_blockquote_list(containers: &ContainerStack) -> bool {
1070    let mut seen_blockquote = false;
1071    for c in &containers.stack {
1072        if matches!(c, Container::BlockQuote { .. }) {
1073            seen_blockquote = true;
1074        }
1075        if seen_blockquote && matches!(c, Container::List { .. }) {
1076            return true;
1077        }
1078    }
1079    false
1080}
1081
1082/// Find matching list level for a marker with the given indent.
1083pub(in crate::parser) fn find_matching_list_level(
1084    containers: &ContainerStack,
1085    marker: &ListMarker,
1086    indent_cols: usize,
1087    dialect: crate::Dialect,
1088) -> Option<usize> {
1089    // Search from deepest (last) to shallowest (first)
1090    // But for shallow items (0-3 indent), prefer matching at the closest base indent
1091    let mut best_match: Option<(usize, usize, bool)> = None; // (index, distance, base_leq_indent)
1092
1093    let is_deep_ordered = matches!(marker, ListMarker::Ordered(_)) && indent_cols >= 4;
1094    let mut best_above_match: Option<(usize, usize)> = None; // (index, delta = base - indent), ordered deep only
1095
1096    for (i, c) in containers.stack.iter().enumerate().rev() {
1097        // BlockQuote acts as a list-continuation barrier. A list outside a
1098        // BlockQuote can't be continued from inside the BlockQuote — opening
1099        // a BlockQuote starts a new container "world". Without this stop,
1100        // `- intro\n\n  > - 0:` matches the outer `-` list and closes the
1101        // freshly-opened BlockQuote (issue #292). Pandoc-native treats the
1102        // inner list as a child of the BlockQuote.
1103        if matches!(c, Container::BlockQuote { .. }) {
1104            break;
1105        }
1106        if let Container::List {
1107            marker: list_marker,
1108            base_indent_cols,
1109            ..
1110        } = c
1111            && markers_match(marker, list_marker, dialect)
1112        {
1113            let matches = if indent_cols >= 4 && *base_indent_cols >= 4 {
1114                // Deep indentation:
1115                // - bullets stay directional to preserve nesting boundaries
1116                // - ordered markers allow small symmetric drift to keep
1117                //   marker-width-aligned lists (i./ii./iii.) at one level
1118                match (marker, list_marker) {
1119                    (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
1120                        indent_cols.abs_diff(*base_indent_cols) <= 3
1121                    }
1122                    _ => indent_cols >= *base_indent_cols && indent_cols <= base_indent_cols + 3,
1123                }
1124            } else if indent_cols >= 4 || *base_indent_cols >= 4 {
1125                // One shallow, one deep:
1126                // - ordered markers still allow symmetric drift so aligned roman
1127                //   markers (e.g. 3/4/5 spaces for i./ii./iii.) stay at one level
1128                // - bullets remain directional to preserve nesting boundaries
1129                match (marker, list_marker) {
1130                    (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
1131                        indent_cols.abs_diff(*base_indent_cols) <= 3
1132                    }
1133                    _ => false,
1134                }
1135            } else {
1136                // Both at shallow indentation (0-3)
1137                // Allow items within 3 spaces
1138                indent_cols.abs_diff(*base_indent_cols) <= 3
1139            };
1140
1141            if matches {
1142                let distance = indent_cols.abs_diff(*base_indent_cols);
1143                let base_leq_indent = *base_indent_cols <= indent_cols;
1144
1145                // For deep ordered lists, avoid "nearest below" re-association caused by
1146                // formatter alignment shifts (e.g. i./ii./iii. becoming 6/7/8-space indents).
1147                // Prefer matching the nearest enclosing level whose base indent is >= current.
1148                if is_deep_ordered
1149                    && matches!(
1150                        (marker, list_marker),
1151                        (ListMarker::Ordered(_), ListMarker::Ordered(_))
1152                    )
1153                    && *base_indent_cols >= indent_cols
1154                {
1155                    let delta = *base_indent_cols - indent_cols;
1156                    if best_above_match.is_none_or(|(_, best_delta)| delta < best_delta) {
1157                        best_above_match = Some((i, delta));
1158                    }
1159                }
1160
1161                if let Some((_, best_dist, best_base_leq)) = best_match {
1162                    if distance < best_dist
1163                        || (distance == best_dist && base_leq_indent && !best_base_leq)
1164                    {
1165                        best_match = Some((i, distance, base_leq_indent));
1166                    }
1167                } else {
1168                    best_match = Some((i, distance, base_leq_indent));
1169                }
1170
1171                // If we found an exact match, return immediately
1172                if distance == 0 {
1173                    return Some(i);
1174                }
1175            }
1176        }
1177    }
1178
1179    if let Some((index, _)) = best_above_match {
1180        return Some(index);
1181    }
1182
1183    best_match.map(|(i, _, _)| i)
1184}
1185
1186/// Start a nested list within an existing list item.
1187pub(in crate::parser) fn start_nested_list(
1188    containers: &mut ContainerStack,
1189    builder: &mut GreenNodeBuilder<'static>,
1190    marker: &ListMarker,
1191    item: &ListItemEmissionInput<'_>,
1192    indent_to_emit: Option<&str>,
1193    config: &ParserOptions,
1194) {
1195    // Emit the indent if needed
1196    if let Some(indent_str) = indent_to_emit {
1197        builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1198    }
1199
1200    // Start nested list
1201    builder.start_node(SyntaxKind::LIST.into());
1202    containers.push(Container::List {
1203        marker: marker.clone(),
1204        base_indent_cols: item.indent_cols,
1205        has_blank_between_items: false,
1206    });
1207
1208    // Add the nested list item
1209    let (content_col, text_to_buffer) = emit_list_item(builder, item);
1210    finish_list_item_with_optional_nested(
1211        containers,
1212        builder,
1213        content_col,
1214        text_to_buffer,
1215        item.virtual_marker_space,
1216        config,
1217    );
1218}
1219
1220/// Checks if the content after a list marker is exactly another bullet marker.
1221/// Returns the nested bullet marker character if detected.
1222pub(in crate::parser) fn is_content_nested_bullet_marker(
1223    content: &str,
1224    marker_len: usize,
1225    spaces_after_bytes: usize,
1226) -> Option<char> {
1227    let (_, indent_bytes) = leading_indent(content);
1228    let content_start = indent_bytes + marker_len + spaces_after_bytes;
1229
1230    if content_start >= content.len() {
1231        return None;
1232    }
1233
1234    let remaining = &content[content_start..];
1235    let (text_part, _) = strip_newline(remaining);
1236    let trimmed = text_part.trim();
1237
1238    // Check if it's exactly one of the bullet marker characters
1239    if trimmed.len() == 1 {
1240        let ch = trimmed.chars().next().unwrap();
1241        if matches!(ch, '*' | '+' | '-') {
1242            return Some(ch);
1243        }
1244    }
1245
1246    None
1247}
1248
1249/// Add a list item that contains a nested empty list (for cases like `- *`).
1250/// This creates: LIST_ITEM (outer) -> LIST (nested) -> LIST_ITEM (empty inner)
1251pub(in crate::parser) fn add_list_item_with_nested_empty_list(
1252    containers: &mut ContainerStack,
1253    builder: &mut GreenNodeBuilder<'static>,
1254    item: &ListItemEmissionInput<'_>,
1255    nested_marker: char,
1256) {
1257    // First, emit the outer list item (just marker + whitespace)
1258    builder.start_node(SyntaxKind::LIST_ITEM.into());
1259
1260    // Emit leading indentation for lossless parsing
1261    if item.indent_bytes > 0 {
1262        builder.token(
1263            SyntaxKind::WHITESPACE.into(),
1264            &item.content[..item.indent_bytes],
1265        );
1266    }
1267
1268    let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
1269    builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
1270
1271    if item.spaces_after_bytes > 0 {
1272        let space_start = item.indent_bytes + item.marker_len;
1273        let space_end = space_start + item.spaces_after_bytes;
1274        if space_end <= item.content.len() {
1275            builder.token(
1276                SyntaxKind::WHITESPACE.into(),
1277                &item.content[space_start..space_end],
1278            );
1279        }
1280    }
1281
1282    // Now start the nested list inside this item
1283    builder.start_node(SyntaxKind::LIST.into());
1284
1285    // Add empty list item to the nested list
1286    builder.start_node(SyntaxKind::LIST_ITEM.into());
1287    builder.token(SyntaxKind::LIST_MARKER.into(), &nested_marker.to_string());
1288
1289    // Extract and emit the newline from original content (lossless)
1290    let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
1291    if content_start < item.content.len() {
1292        let remaining = &item.content[content_start..];
1293        // Skip the nested marker character (1 byte) and get the newline
1294        if remaining.len() > 1 {
1295            let (_, newline_str) = strip_newline(&remaining[1..]);
1296            if !newline_str.is_empty() {
1297                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1298            }
1299        }
1300    }
1301
1302    builder.finish_node(); // Close nested LIST_ITEM
1303    builder.finish_node(); // Close nested LIST
1304
1305    // Push container for the outer list item
1306    let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
1307    containers.push(Container::ListItem {
1308        content_col,
1309        buffer: ListItemBuffer::new(),
1310        marker_only: false, // The nested LIST counts as real content.
1311        virtual_marker_space: item.virtual_marker_space,
1312    });
1313}
1314
1315/// Add a list item to the current list.
1316pub(in crate::parser) fn add_list_item(
1317    containers: &mut ContainerStack,
1318    builder: &mut GreenNodeBuilder<'static>,
1319    item: &ListItemEmissionInput<'_>,
1320    config: &ParserOptions,
1321) {
1322    let (content_col, text_to_buffer) = emit_list_item(builder, item);
1323
1324    log::trace!(
1325        "add_list_item: content={:?}, text_to_buffer={:?}",
1326        item.content,
1327        text_to_buffer
1328    );
1329
1330    finish_list_item_with_optional_nested(
1331        containers,
1332        builder,
1333        content_col,
1334        text_to_buffer,
1335        item.virtual_marker_space,
1336        config,
1337    );
1338}
1339
1340/// Finish a list item by either buffering its content or, when the buffered
1341/// content begins with another list marker followed by content, recursively
1342/// opening a nested LIST with another LIST_ITEM. Pushes the appropriate
1343/// containers onto the stack so the caller doesn't need to.
1344fn finish_list_item_with_optional_nested(
1345    containers: &mut ContainerStack,
1346    builder: &mut GreenNodeBuilder<'static>,
1347    content_col: usize,
1348    text_to_buffer: String,
1349    virtual_marker_space: bool,
1350    config: &ParserOptions,
1351) {
1352    // A line whose content is a thematic break (e.g. `* * *`) takes precedence
1353    // over being parsed as a sequence of nested list markers. Both dialects
1354    // agree: `- * * *` is a list item containing a thematic break, not a
1355    // chain of bullets.
1356    let buffered_is_thematic_break =
1357        super::horizontal_rules::try_parse_horizontal_rule(trim_end_newlines(&text_to_buffer))
1358            .is_some();
1359
1360    // Recursive same-line nested list emission applies to both dialects:
1361    // pandoc-markdown and CommonMark agree on the nested LIST_ITEM shape
1362    // for `- - foo`, `1. - 2. foo`, etc. (verified via `pandoc -f markdown
1363    // -t native` and `pandoc -f commonmark -t native`). The companion
1364    // formatter arm in `format_list_item` handles the LIST-first-child
1365    // shape so the round-trip stays idempotent.
1366
1367    if !buffered_is_thematic_break
1368        && let Some(inner_match) = try_parse_list_marker(&text_to_buffer, config)
1369    {
1370        let inner_content_start = inner_match.marker_len + inner_match.spaces_after_bytes;
1371        let after_inner =
1372            trim_end_newlines(text_to_buffer.get(inner_content_start..).unwrap_or(""));
1373        // Recurse only when there is real content after the inner marker.
1374        // The bare-inner-marker case (e.g. `- *`) is handled by the existing
1375        // `add_list_item_with_nested_empty_list` path.
1376        if !after_inner.is_empty() {
1377            // Push outer ListItem with empty buffer.
1378            containers.push(Container::ListItem {
1379                content_col,
1380                buffer: ListItemBuffer::new(),
1381                marker_only: false, // The nested LIST counts as real content.
1382                virtual_marker_space,
1383            });
1384            // Open nested LIST inside the outer LIST_ITEM.
1385            builder.start_node(SyntaxKind::LIST.into());
1386            containers.push(Container::List {
1387                marker: inner_match.marker.clone(),
1388                base_indent_cols: content_col,
1389                has_blank_between_items: false,
1390            });
1391            // Emit nested LIST_ITEM via emit_list_item, then recurse on its
1392            // content for further-nested same-line markers.
1393            let inner_item = ListItemEmissionInput {
1394                content: text_to_buffer.as_str(),
1395                marker_len: inner_match.marker_len,
1396                spaces_after_cols: inner_match.spaces_after_cols,
1397                spaces_after_bytes: inner_match.spaces_after_bytes,
1398                indent_cols: content_col,
1399                indent_bytes: 0,
1400                virtual_marker_space: inner_match.virtual_marker_space,
1401            };
1402            let (inner_content_col, inner_text_to_buffer) = emit_list_item(builder, &inner_item);
1403            finish_list_item_with_optional_nested(
1404                containers,
1405                builder,
1406                inner_content_col,
1407                inner_text_to_buffer,
1408                inner_match.virtual_marker_space,
1409                config,
1410            );
1411            return;
1412        }
1413    }
1414
1415    // Same-line blockquote marker inside a list item: `1. > Blockquote`
1416    // opens a BLOCK_QUOTE inside the LIST_ITEM, with the post-marker text
1417    // becoming the first line of the blockquote's paragraph. Both
1418    // CommonMark and Pandoc-markdown agree on this shape (verified via
1419    // `pandoc -f commonmark` and `pandoc -f markdown`). The companion
1420    // arm in `format_list_item` emits the LIST_MARKER and the BLOCK_QUOTE
1421    // contents on the same output line so the round-trip stays
1422    // idempotent.
1423    if !buffered_is_thematic_break
1424        && text_to_buffer.starts_with('>')
1425        && !text_to_buffer.starts_with(">>")
1426    {
1427        let bytes = text_to_buffer.as_bytes();
1428        let has_trailing_space = bytes.get(1).copied() == Some(b' ');
1429        let content_offset = if has_trailing_space { 2 } else { 1 };
1430        let remaining = &text_to_buffer[content_offset..];
1431
1432        // Push outer ListItem with empty buffer; the inner BLOCK_QUOTE
1433        // counts as real content so `marker_only` is false.
1434        containers.push(Container::ListItem {
1435            content_col,
1436            buffer: ListItemBuffer::new(),
1437            marker_only: false,
1438            virtual_marker_space,
1439        });
1440
1441        // Open BLOCK_QUOTE node inside the LIST_ITEM and emit the marker.
1442        builder.start_node(SyntaxKind::BLOCK_QUOTE.into());
1443        builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
1444        if has_trailing_space {
1445            builder.token(SyntaxKind::WHITESPACE.into(), " ");
1446        }
1447        containers.push(Container::BlockQuote {});
1448
1449        let trimmed = trim_end_newlines(remaining);
1450
1451        // If the BlockQuote content begins with another list marker
1452        // followed by real content, recursively open a nested LIST inside
1453        // the BLOCK_QUOTE. Both Pandoc-markdown and CommonMark agree:
1454        // `- > - foo` produces
1455        // `BulletList [BlockQuote [BulletList [[Plain "foo"]]]]`
1456        // (verified via `pandoc -f markdown` and `pandoc -f commonmark`).
1457        let inner_is_thematic_break =
1458            super::horizontal_rules::try_parse_horizontal_rule(trimmed).is_some();
1459        if !inner_is_thematic_break
1460            && let Some(inner_match) = try_parse_list_marker(remaining, config)
1461        {
1462            let inner_content_start = inner_match.marker_len + inner_match.spaces_after_bytes;
1463            let after_inner = trim_end_newlines(remaining.get(inner_content_start..).unwrap_or(""));
1464            if !after_inner.is_empty() {
1465                let bq_content_col = content_col + content_offset;
1466                builder.start_node(SyntaxKind::LIST.into());
1467                containers.push(Container::List {
1468                    marker: inner_match.marker.clone(),
1469                    base_indent_cols: bq_content_col,
1470                    has_blank_between_items: false,
1471                });
1472                let inner_item = ListItemEmissionInput {
1473                    content: remaining,
1474                    marker_len: inner_match.marker_len,
1475                    spaces_after_cols: inner_match.spaces_after_cols,
1476                    spaces_after_bytes: inner_match.spaces_after_bytes,
1477                    indent_cols: bq_content_col,
1478                    indent_bytes: 0,
1479                    virtual_marker_space: inner_match.virtual_marker_space,
1480                };
1481                let (inner_content_col, inner_text_to_buffer) =
1482                    emit_list_item(builder, &inner_item);
1483                finish_list_item_with_optional_nested(
1484                    containers,
1485                    builder,
1486                    inner_content_col,
1487                    inner_text_to_buffer,
1488                    inner_match.virtual_marker_space,
1489                    config,
1490                );
1491                return;
1492            }
1493        }
1494
1495        // If there is content after `> `, start a paragraph and buffer
1496        // the first line; subsequent lines flow in via the parser's main
1497        // loop (lazy continuation handles the no-marker continuation
1498        // line in cases like #292).
1499        if !trimmed.is_empty() {
1500            crate::parser::blocks::paragraphs::start_paragraph_if_needed(containers, builder);
1501            crate::parser::blocks::paragraphs::append_paragraph_line(
1502                containers, builder, remaining, config,
1503            );
1504        }
1505        return;
1506    }
1507
1508    let marker_only = text_to_buffer.trim().is_empty();
1509    let mut buffer = ListItemBuffer::new();
1510    if !text_to_buffer.is_empty() {
1511        buffer.push_text(text_to_buffer);
1512    }
1513    containers.push(Container::ListItem {
1514        content_col,
1515        buffer,
1516        marker_only,
1517        virtual_marker_space,
1518    });
1519}
panache_parser/parser/blocks/lists.rs

panache_parser/parser/blocks/
lists.rs