Skip to main content

panache_parser/parser/blocks/
lists.rs

1use crate::options::ParserOptions;
2use crate::syntax::SyntaxKind;
3use rowan::GreenNodeBuilder;
4
5use crate::parser::utils::container_stack::{
6    Container, ContainerStack, leading_indent, leading_indent_from,
7};
8use crate::parser::utils::helpers::{strip_newline, trim_end_newlines};
9use crate::parser::utils::list_item_buffer::ListItemBuffer;
10
11#[derive(Debug, Clone, PartialEq)]
12pub(crate) enum ListMarker {
13    Bullet(char),
14    Ordered(OrderedMarker),
15}
16
17#[derive(Debug, Clone, PartialEq)]
18pub(crate) enum OrderedMarker {
19    Decimal {
20        number: String,
21        style: ListDelimiter,
22    },
23    Hash,
24    LowerAlpha {
25        letter: char,
26        style: ListDelimiter,
27    },
28    UpperAlpha {
29        letter: char,
30        style: ListDelimiter,
31    },
32    LowerRoman {
33        numeral: String,
34        style: ListDelimiter,
35    },
36    UpperRoman {
37        numeral: String,
38        style: ListDelimiter,
39    },
40    Example {
41        label: Option<String>,
42    },
43}
44
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub(crate) enum ListDelimiter {
47    Period,
48    RightParen,
49    Parens,
50}
51
52#[derive(Debug, Clone, PartialEq)]
53pub(crate) struct ListMarkerMatch {
54    pub(crate) marker: ListMarker,
55    pub(crate) marker_len: usize,
56    pub(crate) spaces_after_cols: usize,
57    pub(crate) spaces_after_bytes: usize,
58    /// True when CommonMark's "≥ 5 cols of post-marker whitespace → marker + 1
59    /// virtual space; rest belongs to content" rule fired during marker
60    /// detection. The marker's required 1 col of trailing space was virtually
61    /// absorbed (typically from a tab) rather than consumed as a literal byte;
62    /// the surplus whitespace is left in the post-marker text so block-level
63    /// detection can recognize it as an indented code block.
64    pub(crate) virtual_marker_space: bool,
65}
66
67#[derive(Debug, Clone, Copy)]
68pub(in crate::parser) struct ListItemEmissionInput<'a> {
69    pub content: &'a str,
70    pub marker_len: usize,
71    pub spaces_after_cols: usize,
72    pub spaces_after_bytes: usize,
73    pub indent_cols: usize,
74    pub indent_bytes: usize,
75    pub virtual_marker_space: bool,
76}
77
78/// Parse a Roman numeral (lower or upper case).
79/// Returns the byte-length of the numeral if valid, None otherwise.
80///
81/// Byte-level and allocation-free. Callers (`try_parse_list_marker` for
82/// fancy-list ordering) hit this on every line, so the prior path —
83/// `to_uppercase` String + repeated `Vec<char>::collect` + an always-
84/// allocated `String` return — was a profile hotspot. All Roman numeral
85/// chars are ASCII; map to canonical-upper byte via `b & !0x20` and
86/// validate without heap traffic. Callers slice the original input
87/// only on a confirmed full match (when the trailing `.` / `)` is
88/// also present), so the `String` cost is moved off the no-match path.
89fn try_parse_roman_numeral(text: &str, uppercase: bool) -> Option<usize> {
90    let bytes = text.as_bytes();
91    // Take while ASCII char is one of `IVXLCDM` (case-folded).
92    let mut count = 0usize;
93    while count < bytes.len() {
94        let b = bytes[count];
95        let valid = if uppercase {
96            matches!(b, b'I' | b'V' | b'X' | b'L' | b'C' | b'D' | b'M')
97        } else {
98            matches!(b, b'i' | b'v' | b'x' | b'l' | b'c' | b'd' | b'm')
99        };
100        if !valid {
101            break;
102        }
103        count += 1;
104    }
105
106    if count == 0 {
107        return None;
108    }
109
110    // For single-character numerals, only accept the most common ones to avoid
111    // ambiguity with alphabetic list markers (a-z, A-Z).
112    if count == 1 {
113        let upper = bytes[0] & !0x20;
114        if !matches!(upper, b'I' | b'V' | b'X') {
115            return None;
116        }
117    }
118
119    // Reject sequences of >= 4 consecutive same chars (case-insensitive).
120    // Also reject doubled V/L/D (only ever appear once in valid Romans).
121    let mut run_byte = 0u8;
122    let mut run_len = 0usize;
123    for &b in &bytes[..count] {
124        let upper = b & !0x20;
125        if upper == run_byte {
126            run_len += 1;
127        } else {
128            run_byte = upper;
129            run_len = 1;
130        }
131        if (run_len > 3 && matches!(upper, b'I' | b'X' | b'C'))
132            || (run_len > 1 && matches!(upper, b'V' | b'L' | b'D'))
133        {
134            return None;
135        }
136    }
137
138    // Validate subtractive notation: V/L/D can never precede a larger
139    // numeral; I, X, C only precede the next two larger units.
140    fn val(upper: u8) -> u32 {
141        match upper {
142            b'I' => 1,
143            b'V' => 5,
144            b'X' => 10,
145            b'L' => 50,
146            b'C' => 100,
147            b'D' => 500,
148            b'M' => 1000,
149            _ => 0,
150        }
151    }
152    for i in 0..count.saturating_sub(1) {
153        let curr = bytes[i] & !0x20;
154        let next = bytes[i + 1] & !0x20;
155        let cv = val(curr);
156        let nv = val(next);
157        if cv < nv {
158            match (curr, next) {
159                (b'I', b'V') | (b'I', b'X') => {}
160                (b'X', b'L') | (b'X', b'C') => {}
161                (b'C', b'D') | (b'C', b'M') => {}
162                _ => return None,
163            }
164        }
165    }
166    Some(count)
167}
168
169/// Compute (spaces_after_cols, spaces_after_bytes, virtual_marker_space) for a
170/// post-marker string starting at column `marker_end_col` of the source line.
171///
172/// Implements CommonMark §5.2 rule #2: when the effective column-width of the
173/// post-marker whitespace (counted with tabs expanding from `marker_end_col`)
174/// is ≥ 5 and there is non-empty content after it, the list item's content
175/// column is `marker_end_col + 1` (the marker plus exactly one — possibly
176/// virtual — space). The surplus whitespace is left in the post-marker text
177/// so block-level dispatch can recognize it as an indented code block.
178///
179/// In the rule case, when the first byte is a tab whose source-column span
180/// exceeds 1, no bytes are consumed (the tab stays in content) and
181/// `virtual_marker_space` is true. Otherwise the byte count describes the
182/// literal whitespace consumed as marker space.
183fn marker_spaces_after(after_marker: &str, marker_end_col: usize) -> (usize, usize, bool) {
184    let (effective_cols, n_bytes) = leading_indent_from(after_marker, marker_end_col);
185    let after_ws = &after_marker[n_bytes..];
186    let has_content = !trim_end_newlines(after_ws).is_empty();
187    if has_content && effective_cols >= 5 {
188        let bytes = match after_marker.as_bytes().first() {
189            Some(b' ') => 1,
190            Some(b'\t') => {
191                let span = 4 - (marker_end_col % 4);
192                if span == 1 { 1 } else { 0 }
193            }
194            _ => 0,
195        };
196        (1, bytes, bytes == 0)
197    } else {
198        (effective_cols, n_bytes, false)
199    }
200}
201
202pub(crate) fn try_parse_list_marker(line: &str, config: &ParserOptions) -> Option<ListMarkerMatch> {
203    // Trailing newlines should not block bare-marker detection; the line `*\n`
204    // is a bare bullet marker and the post-marker text is logically empty.
205    let line = trim_end_newlines(line);
206    let (_indent_cols, indent_bytes) = leading_indent(line);
207    let trimmed = &line[indent_bytes..];
208
209    // Try bullet markers (including task lists)
210    if let Some(ch) = trimmed.chars().next()
211        && matches!(ch, '*' | '+' | '-')
212    {
213        let after_marker = &trimmed[1..];
214
215        // Check for task list: [ ] or [x] or [X]
216        let trimmed_after = after_marker.trim_start();
217        let is_task = trimmed_after.starts_with('[')
218            && trimmed_after.len() >= 3
219            && matches!(
220                trimmed_after.chars().nth(1),
221                Some(' ') | Some('x') | Some('X')
222            )
223            && trimmed_after.chars().nth(2) == Some(']');
224
225        // Must be followed by whitespace (or be task list)
226        if after_marker.starts_with(' ')
227            || after_marker.starts_with('\t')
228            || after_marker.is_empty()
229            || is_task
230        {
231            let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
232                marker_spaces_after(after_marker, _indent_cols + 1);
233            return Some(ListMarkerMatch {
234                marker: ListMarker::Bullet(ch),
235                marker_len: 1,
236                spaces_after_cols,
237                spaces_after_bytes,
238                virtual_marker_space,
239            });
240        }
241    }
242
243    // Try ordered markers
244    if config.extensions.fancy_lists
245        && let Some(after_marker) = trimmed.strip_prefix("#.")
246        && (after_marker.starts_with(' ')
247            || after_marker.starts_with('\t')
248            || after_marker.is_empty())
249    {
250        let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
251            marker_spaces_after(after_marker, _indent_cols + 2);
252        return Some(ListMarkerMatch {
253            marker: ListMarker::Ordered(OrderedMarker::Hash),
254            marker_len: 2,
255            spaces_after_cols,
256            spaces_after_bytes,
257            virtual_marker_space,
258        });
259    }
260
261    // Try example lists: (@) or (@label)
262    if config.extensions.example_lists
263        && let Some(rest) = trimmed.strip_prefix("(@")
264    {
265        // Check if it has a label or is just (@)
266        let label_end = rest
267            .chars()
268            .take_while(|c| c.is_alphanumeric() || *c == '_' || *c == '-')
269            .count();
270
271        // Must be followed by ')'
272        if rest.len() > label_end && rest.chars().nth(label_end) == Some(')') {
273            let label = if label_end > 0 {
274                Some(rest[..label_end].to_string())
275            } else {
276                None
277            };
278
279            let after_marker = &rest[label_end + 1..];
280            if after_marker.starts_with(' ')
281                || after_marker.starts_with('\t')
282                || after_marker.is_empty()
283            {
284                let marker_len = 2 + label_end + 1; // "(@" + label + ")"
285                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
286                    marker_spaces_after(after_marker, _indent_cols + marker_len);
287                return Some(ListMarkerMatch {
288                    marker: ListMarker::Ordered(OrderedMarker::Example { label }),
289                    marker_len,
290                    spaces_after_cols,
291                    spaces_after_bytes,
292                    virtual_marker_space,
293                });
294            }
295        }
296    }
297
298    // Try parenthesized markers: (2), (a), (ii)
299    if let Some(rest) = trimmed.strip_prefix('(') {
300        if config.extensions.fancy_lists {
301            // Try decimal: (2)
302            let digit_count = rest.chars().take_while(|c| c.is_ascii_digit()).count();
303            if digit_count > 0
304                && rest.len() > digit_count
305                && rest.chars().nth(digit_count) == Some(')')
306            {
307                let number = &rest[..digit_count];
308                let after_marker = &rest[digit_count + 1..];
309                if after_marker.starts_with(' ')
310                    || after_marker.starts_with('\t')
311                    || after_marker.is_empty()
312                {
313                    let marker_len = 2 + digit_count;
314                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
315                        marker_spaces_after(after_marker, _indent_cols + marker_len);
316                    return Some(ListMarkerMatch {
317                        marker: ListMarker::Ordered(OrderedMarker::Decimal {
318                            number: number.to_string(),
319                            style: ListDelimiter::Parens,
320                        }),
321                        marker_len,
322                        spaces_after_cols,
323                        spaces_after_bytes,
324                        virtual_marker_space,
325                    });
326                }
327            }
328        }
329
330        // Try fancy lists if enabled (parenthesized markers)
331        if config.extensions.fancy_lists {
332            // Try Roman numerals first (to avoid ambiguity with letters i, v, x, etc.)
333
334            // Try lowercase Roman: (ii)
335            if let Some(len) = try_parse_roman_numeral(rest, false)
336                && rest.len() > len
337                && rest.as_bytes()[len] == b')'
338            {
339                let after_marker = &rest[len + 1..];
340                if after_marker.starts_with(' ')
341                    || after_marker.starts_with('\t')
342                    || after_marker.is_empty()
343                {
344                    let marker_len = len + 2;
345                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
346                        marker_spaces_after(after_marker, _indent_cols + marker_len);
347                    return Some(ListMarkerMatch {
348                        marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
349                            numeral: rest[..len].to_string(),
350                            style: ListDelimiter::Parens,
351                        }),
352                        marker_len,
353                        spaces_after_cols,
354                        spaces_after_bytes,
355                        virtual_marker_space,
356                    });
357                }
358            }
359
360            // Try uppercase Roman: (II)
361            if let Some(len) = try_parse_roman_numeral(rest, true)
362                && rest.len() > len
363                && rest.as_bytes()[len] == b')'
364            {
365                let after_marker = &rest[len + 1..];
366                if after_marker.starts_with(' ')
367                    || after_marker.starts_with('\t')
368                    || after_marker.is_empty()
369                {
370                    let marker_len = len + 2;
371                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
372                        marker_spaces_after(after_marker, _indent_cols + marker_len);
373                    return Some(ListMarkerMatch {
374                        marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
375                            numeral: rest[..len].to_string(),
376                            style: ListDelimiter::Parens,
377                        }),
378                        marker_len,
379                        spaces_after_cols,
380                        spaces_after_bytes,
381                        virtual_marker_space,
382                    });
383                }
384            }
385
386            // Try lowercase letter: (a)
387            if let Some(ch) = rest.chars().next()
388                && ch.is_ascii_lowercase()
389                && rest.len() > 1
390                && rest.chars().nth(1) == Some(')')
391            {
392                let after_marker = &rest[2..];
393                if after_marker.starts_with(' ')
394                    || after_marker.starts_with('\t')
395                    || after_marker.is_empty()
396                {
397                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
398                        marker_spaces_after(after_marker, _indent_cols + 3);
399                    return Some(ListMarkerMatch {
400                        marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
401                            letter: ch,
402                            style: ListDelimiter::Parens,
403                        }),
404                        marker_len: 3,
405                        spaces_after_cols,
406                        spaces_after_bytes,
407                        virtual_marker_space,
408                    });
409                }
410            }
411
412            // Try uppercase letter: (A)
413            if let Some(ch) = rest.chars().next()
414                && ch.is_ascii_uppercase()
415                && rest.len() > 1
416                && rest.chars().nth(1) == Some(')')
417            {
418                let after_marker = &rest[2..];
419                if after_marker.starts_with(' ')
420                    || after_marker.starts_with('\t')
421                    || after_marker.is_empty()
422                {
423                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
424                        marker_spaces_after(after_marker, _indent_cols + 3);
425                    return Some(ListMarkerMatch {
426                        marker: ListMarker::Ordered(OrderedMarker::UpperAlpha {
427                            letter: ch,
428                            style: ListDelimiter::Parens,
429                        }),
430                        marker_len: 3,
431                        spaces_after_cols,
432                        spaces_after_bytes,
433                        virtual_marker_space,
434                    });
435                }
436            }
437        }
438    }
439
440    // Try decimal numbers: 1. or 1)
441    let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
442    if digit_count > 0 && trimmed.len() > digit_count {
443        // CommonMark restricts ordered list markers to 1-9 digits (spec §5.2).
444        // Pandoc-markdown accepts arbitrary digit counts.
445        if config.dialect == crate::Dialect::CommonMark && digit_count > 9 {
446            return None;
447        }
448
449        let number = &trimmed[..digit_count];
450        let delim = trimmed.chars().nth(digit_count);
451
452        let (style, marker_len) = match delim {
453            Some('.') => (ListDelimiter::Period, digit_count + 1),
454            Some(')') => (ListDelimiter::RightParen, digit_count + 1),
455            _ => return None,
456        };
457        // CommonMark §5.2: decimal `1)` markers are part of the core grammar.
458        // Pandoc-markdown gates `)`-style ordered markers behind `fancy_lists`.
459        if style == ListDelimiter::RightParen
460            && !config.extensions.fancy_lists
461            && config.dialect != crate::Dialect::CommonMark
462        {
463            return None;
464        }
465
466        let after_marker = &trimmed[marker_len..];
467        if after_marker.starts_with(' ')
468            || after_marker.starts_with('\t')
469            || after_marker.is_empty()
470        {
471            let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
472                marker_spaces_after(after_marker, _indent_cols + marker_len);
473            return Some(ListMarkerMatch {
474                marker: ListMarker::Ordered(OrderedMarker::Decimal {
475                    number: number.to_string(),
476                    style,
477                }),
478                marker_len,
479                spaces_after_cols,
480                spaces_after_bytes,
481                virtual_marker_space,
482            });
483        }
484    }
485
486    // Try fancy lists if enabled (non-parenthesized)
487    if config.extensions.fancy_lists {
488        // Try Roman numerals first, as they may overlap with letters
489
490        // Try lowercase Roman: i. or ii)
491        if let Some(len) = try_parse_roman_numeral(trimmed, false)
492            && trimmed.len() > len
493            && let delim = trimmed.as_bytes()[len]
494            && (delim == b'.' || delim == b')')
495        {
496            let style = if delim == b'.' {
497                ListDelimiter::Period
498            } else {
499                ListDelimiter::RightParen
500            };
501            let marker_len = len + 1;
502
503            let after_marker = &trimmed[marker_len..];
504            if after_marker.starts_with(' ')
505                || after_marker.starts_with('\t')
506                || after_marker.is_empty()
507            {
508                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
509                    marker_spaces_after(after_marker, _indent_cols + marker_len);
510                return Some(ListMarkerMatch {
511                    marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
512                        numeral: trimmed[..len].to_string(),
513                        style,
514                    }),
515                    marker_len,
516                    spaces_after_cols,
517                    spaces_after_bytes,
518                    virtual_marker_space,
519                });
520            }
521        }
522
523        // Try uppercase Roman: I. or II)
524        if let Some(len) = try_parse_roman_numeral(trimmed, true)
525            && trimmed.len() > len
526            && let delim = trimmed.as_bytes()[len]
527            && (delim == b'.' || delim == b')')
528        {
529            let style = if delim == b'.' {
530                ListDelimiter::Period
531            } else {
532                ListDelimiter::RightParen
533            };
534            let marker_len = len + 1;
535
536            let after_marker = &trimmed[marker_len..];
537            if after_marker.starts_with(' ')
538                || after_marker.starts_with('\t')
539                || after_marker.is_empty()
540            {
541                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
542                    marker_spaces_after(after_marker, _indent_cols + marker_len);
543                return Some(ListMarkerMatch {
544                    marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
545                        numeral: trimmed[..len].to_string(),
546                        style,
547                    }),
548                    marker_len,
549                    spaces_after_cols,
550                    spaces_after_bytes,
551                    virtual_marker_space,
552                });
553            }
554        }
555
556        // Try lowercase letter: a. or a)
557        if let Some(ch) = trimmed.chars().next()
558            && ch.is_ascii_lowercase()
559            && trimmed.len() > 1
560            && let Some(delim) = trimmed.chars().nth(1)
561            && (delim == '.' || delim == ')')
562        {
563            let style = if delim == '.' {
564                ListDelimiter::Period
565            } else {
566                ListDelimiter::RightParen
567            };
568            let marker_len = 2;
569
570            let after_marker = &trimmed[marker_len..];
571            if after_marker.starts_with(' ')
572                || after_marker.starts_with('\t')
573                || after_marker.is_empty()
574            {
575                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
576                    marker_spaces_after(after_marker, _indent_cols + marker_len);
577                return Some(ListMarkerMatch {
578                    marker: ListMarker::Ordered(OrderedMarker::LowerAlpha { letter: ch, style }),
579                    marker_len,
580                    spaces_after_cols,
581                    spaces_after_bytes,
582                    virtual_marker_space,
583                });
584            }
585        }
586
587        // Try uppercase letter: A. or A)
588        if let Some(ch) = trimmed.chars().next()
589            && ch.is_ascii_uppercase()
590            && trimmed.len() > 1
591            && let Some(delim) = trimmed.chars().nth(1)
592            && (delim == '.' || delim == ')')
593        {
594            let style = if delim == '.' {
595                ListDelimiter::Period
596            } else {
597                ListDelimiter::RightParen
598            };
599            let marker_len = 2;
600
601            let after_marker = &trimmed[marker_len..];
602            // Special rule: uppercase letter with period needs 2 spaces minimum
603            let min_spaces = if delim == '.' { 2 } else { 1 };
604            let (effective_cols, _) = leading_indent_from(after_marker, _indent_cols + marker_len);
605
606            if (after_marker.starts_with(' ') || after_marker.starts_with('\t'))
607                && effective_cols >= min_spaces
608            {
609                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
610                    marker_spaces_after(after_marker, _indent_cols + marker_len);
611                return Some(ListMarkerMatch {
612                    marker: ListMarker::Ordered(OrderedMarker::UpperAlpha { letter: ch, style }),
613                    marker_len,
614                    spaces_after_cols,
615                    spaces_after_bytes,
616                    virtual_marker_space,
617                });
618            }
619        }
620    }
621
622    None
623}
624
625pub(crate) fn markers_match(a: &ListMarker, b: &ListMarker, dialect: crate::Dialect) -> bool {
626    match (a, b) {
627        // CommonMark §5.3: bullet list markers `-`, `+`, `*` are *distinct*
628        // bullet types — switching from one to another starts a new list.
629        // Pandoc-markdown treats them as interchangeable: any bullet
630        // continues an open bullet list. Verified with pandoc against
631        // `- foo\n- bar\n+ baz\n` (#301).
632        (ListMarker::Bullet(ca), ListMarker::Bullet(cb)) => match dialect {
633            crate::Dialect::CommonMark => ca == cb,
634            _ => true,
635        },
636        (ListMarker::Ordered(OrderedMarker::Hash), ListMarker::Ordered(OrderedMarker::Hash)) => {
637            true
638        }
639        (
640            ListMarker::Ordered(OrderedMarker::Decimal { style: s1, .. }),
641            ListMarker::Ordered(OrderedMarker::Decimal { style: s2, .. }),
642        ) => s1 == s2,
643        (
644            ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s1, .. }),
645            ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s2, .. }),
646        ) => s1 == s2,
647        (
648            ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s1, .. }),
649            ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s2, .. }),
650        ) => s1 == s2,
651        (
652            ListMarker::Ordered(OrderedMarker::LowerRoman { style: s1, .. }),
653            ListMarker::Ordered(OrderedMarker::LowerRoman { style: s2, .. }),
654        ) => s1 == s2,
655        (
656            ListMarker::Ordered(OrderedMarker::UpperRoman { style: s1, .. }),
657            ListMarker::Ordered(OrderedMarker::UpperRoman { style: s2, .. }),
658        ) => s1 == s2,
659        (
660            ListMarker::Ordered(OrderedMarker::Example { .. }),
661            ListMarker::Ordered(OrderedMarker::Example { .. }),
662        ) => true, // All example list items match each other
663        _ => false,
664    }
665}
666
667/// Emit a list item node to the builder (marker and whitespace only).
668/// Returns (content_col, text_to_buffer) where text_to_buffer is the content that should be
669/// added to the list item buffer for later inline parsing.
670pub(in crate::parser) fn emit_list_item(
671    builder: &mut GreenNodeBuilder<'static>,
672    item: &ListItemEmissionInput<'_>,
673) -> (usize, String) {
674    builder.start_node(SyntaxKind::LIST_ITEM.into());
675
676    // Emit leading indentation for lossless parsing
677    if item.indent_bytes > 0 {
678        builder.token(
679            SyntaxKind::WHITESPACE.into(),
680            &item.content[..item.indent_bytes],
681        );
682    }
683
684    let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
685    builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
686
687    if item.spaces_after_bytes > 0 {
688        let space_start = item.indent_bytes + item.marker_len;
689        let space_end = space_start + item.spaces_after_bytes;
690        if space_end <= item.content.len() {
691            builder.token(
692                SyntaxKind::WHITESPACE.into(),
693                &item.content[space_start..space_end],
694            );
695        }
696    }
697
698    let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
699    let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
700
701    // Extract text content to be buffered (instead of emitting it directly).
702    // If the item starts with a task checkbox, emit it as a dedicated token so it
703    // doesn't get parsed as a link.
704    let text_to_buffer = if content_start < item.content.len() {
705        let rest = &item.content[content_start..];
706        if (rest.starts_with("[ ]") || rest.starts_with("[x]") || rest.starts_with("[X]"))
707            && rest
708                .as_bytes()
709                .get(3)
710                .is_some_and(|b| (*b as char).is_whitespace())
711        {
712            builder.token(SyntaxKind::TASK_CHECKBOX.into(), &rest[..3]);
713            rest[3..].to_string()
714        } else {
715            rest.to_string()
716        }
717    } else {
718        String::new()
719    };
720
721    (content_col, text_to_buffer)
722}
723
724#[cfg(test)]
725mod tests {
726    use super::*;
727    use crate::options::ParserOptions;
728
729    #[test]
730    fn detects_bullet_markers() {
731        let config = ParserOptions::default();
732        assert!(try_parse_list_marker("* item", &config).is_some());
733        assert!(try_parse_list_marker("*\titem", &config).is_some());
734    }
735
736    #[test]
737    fn detects_fancy_alpha_markers() {
738        let mut config = ParserOptions::default();
739        config.extensions.fancy_lists = true;
740
741        // Test lowercase alpha period
742        assert!(
743            try_parse_list_marker("a. item", &config).is_some(),
744            "a. should parse"
745        );
746        assert!(
747            try_parse_list_marker("b. item", &config).is_some(),
748            "b. should parse"
749        );
750        assert!(
751            try_parse_list_marker("c. item", &config).is_some(),
752            "c. should parse"
753        );
754
755        // Test lowercase alpha right paren
756        assert!(
757            try_parse_list_marker("a) item", &config).is_some(),
758            "a) should parse"
759        );
760        assert!(
761            try_parse_list_marker("b) item", &config).is_some(),
762            "b) should parse"
763        );
764    }
765}
766
767#[test]
768fn markers_match_fancy_lists() {
769    use ListDelimiter::*;
770    use ListMarker::*;
771    use OrderedMarker::*;
772
773    // Same type and style should match
774    let a_period = Ordered(LowerAlpha {
775        letter: 'a',
776        style: Period,
777    });
778    let b_period = Ordered(LowerAlpha {
779        letter: 'b',
780        style: Period,
781    });
782    assert!(
783        markers_match(&a_period, &b_period, crate::Dialect::Pandoc),
784        "a. and b. should match"
785    );
786
787    let i_period = Ordered(LowerRoman {
788        numeral: "i".to_string(),
789        style: Period,
790    });
791    let ii_period = Ordered(LowerRoman {
792        numeral: "ii".to_string(),
793        style: Period,
794    });
795    assert!(
796        markers_match(&i_period, &ii_period, crate::Dialect::Pandoc),
797        "i. and ii. should match"
798    );
799
800    // Different styles should not match
801    let a_paren = Ordered(LowerAlpha {
802        letter: 'a',
803        style: RightParen,
804    });
805    assert!(
806        !markers_match(&a_period, &a_paren, crate::Dialect::Pandoc),
807        "a. and a) should not match"
808    );
809}
810
811#[test]
812fn markers_match_bullet_dialect_split() {
813    use ListMarker::*;
814    // Pandoc: any bullet matches any bullet (same list).
815    assert!(markers_match(
816        &Bullet('-'),
817        &Bullet('+'),
818        crate::Dialect::Pandoc
819    ));
820    // CommonMark: bullets match only when the marker character is the same.
821    assert!(markers_match(
822        &Bullet('-'),
823        &Bullet('-'),
824        crate::Dialect::CommonMark
825    ));
826    assert!(!markers_match(
827        &Bullet('-'),
828        &Bullet('+'),
829        crate::Dialect::CommonMark
830    ));
831    assert!(!markers_match(
832        &Bullet('*'),
833        &Bullet('-'),
834        crate::Dialect::CommonMark
835    ));
836}
837
838#[test]
839fn detects_complex_roman_numerals() {
840    let mut config = ParserOptions::default();
841    config.extensions.fancy_lists = true;
842
843    // Test various Roman numerals
844    assert!(
845        try_parse_list_marker("iv. item", &config).is_some(),
846        "iv. should parse"
847    );
848    assert!(
849        try_parse_list_marker("v. item", &config).is_some(),
850        "v. should parse"
851    );
852    assert!(
853        try_parse_list_marker("vi. item", &config).is_some(),
854        "vi. should parse"
855    );
856    assert!(
857        try_parse_list_marker("vii. item", &config).is_some(),
858        "vii. should parse"
859    );
860    assert!(
861        try_parse_list_marker("viii. item", &config).is_some(),
862        "viii. should parse"
863    );
864    assert!(
865        try_parse_list_marker("ix. item", &config).is_some(),
866        "ix. should parse"
867    );
868    assert!(
869        try_parse_list_marker("x. item", &config).is_some(),
870        "x. should parse"
871    );
872}
873
874#[test]
875fn detects_example_list_markers() {
876    let mut config = ParserOptions::default();
877    config.extensions.example_lists = true;
878
879    // Test unlabeled example
880    assert!(
881        try_parse_list_marker("(@) item", &config).is_some(),
882        "(@) should parse"
883    );
884
885    // Test labeled examples
886    assert!(
887        try_parse_list_marker("(@foo) item", &config).is_some(),
888        "(@foo) should parse"
889    );
890    assert!(
891        try_parse_list_marker("(@my_label) item", &config).is_some(),
892        "(@my_label) should parse"
893    );
894    assert!(
895        try_parse_list_marker("(@test-123) item", &config).is_some(),
896        "(@test-123) should parse"
897    );
898
899    // Test with extension disabled
900    let disabled_config = ParserOptions {
901        extensions: crate::options::Extensions {
902            example_lists: false,
903            ..Default::default()
904        },
905        ..Default::default()
906    };
907    assert!(
908        try_parse_list_marker("(@) item", &disabled_config).is_none(),
909        "(@) should not parse when extension disabled"
910    );
911}
912
913#[test]
914fn deep_ordered_prefers_nearest_enclosing_indent_over_nearest_below() {
915    use crate::parser::utils::container_stack::{Container, ContainerStack};
916
917    let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
918        numeral: "ii".to_string(),
919        style: ListDelimiter::Period,
920    });
921
922    let mut containers = ContainerStack::new();
923    containers.push(Container::List {
924        marker: marker.clone(),
925        base_indent_cols: 8,
926        has_blank_between_items: false,
927    });
928    containers.push(Container::ListItem {
929        content_col: 11,
930        buffer: crate::parser::utils::list_item_buffer::ListItemBuffer::new(),
931        marker_only: false,
932        virtual_marker_space: false,
933    });
934    containers.push(Container::List {
935        marker,
936        base_indent_cols: 6,
937        has_blank_between_items: false,
938    });
939
940    // With deep ordered drift (indent 7), we should keep the enclosing level
941    // (base indent 8), not re-associate to the nearest lower sibling level (6).
942    assert_eq!(
943        find_matching_list_level(
944            &containers,
945            &ListMarker::Ordered(OrderedMarker::LowerRoman {
946                numeral: "iii".to_string(),
947                style: ListDelimiter::Period,
948            }),
949            7,
950            crate::Dialect::Pandoc,
951        ),
952        Some(0)
953    );
954}
955
956#[test]
957fn deep_ordered_matches_exact_indent_when_available() {
958    use crate::parser::utils::container_stack::{Container, ContainerStack};
959
960    let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
961        numeral: "ii".to_string(),
962        style: ListDelimiter::Period,
963    });
964
965    let mut containers = ContainerStack::new();
966    containers.push(Container::List {
967        marker: marker.clone(),
968        base_indent_cols: 8,
969        has_blank_between_items: false,
970    });
971    containers.push(Container::List {
972        marker,
973        base_indent_cols: 6,
974        has_blank_between_items: false,
975    });
976
977    assert_eq!(
978        find_matching_list_level(
979            &containers,
980            &ListMarker::Ordered(OrderedMarker::LowerRoman {
981                numeral: "iii".to_string(),
982                style: ListDelimiter::Period,
983            }),
984            6,
985            crate::Dialect::Pandoc,
986        ),
987        Some(1)
988    );
989}
990
991#[test]
992fn parses_nested_bullet_list_from_single_marker() {
993    use crate::parse;
994    use crate::syntax::SyntaxKind;
995
996    let config = ParserOptions::default();
997
998    // Test all three bullet marker combinations as nested lists
999    for (input, desc) in [("- *\n", "- *"), ("- +\n", "- +"), ("- -\n", "- -")] {
1000        let tree = parse(input, Some(config.clone()));
1001
1002        // tree IS the DOCUMENT node
1003        assert_eq!(
1004            tree.kind(),
1005            SyntaxKind::DOCUMENT,
1006            "{desc}: root should be DOCUMENT"
1007        );
1008
1009        // Should have a LIST as first child of DOCUMENT
1010        let outer_list = tree
1011            .children()
1012            .find(|n| n.kind() == SyntaxKind::LIST)
1013            .unwrap_or_else(|| panic!("{desc}: should have outer LIST node"));
1014
1015        // Outer list should have a LIST_ITEM
1016        let outer_item = outer_list
1017            .children()
1018            .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
1019            .unwrap_or_else(|| panic!("{desc}: should have outer LIST_ITEM"));
1020
1021        // Outer list item should contain a nested LIST (not PLAIN with TEXT)
1022        let nested_list = outer_item
1023            .children()
1024            .find(|n| n.kind() == SyntaxKind::LIST)
1025            .unwrap_or_else(|| {
1026                panic!(
1027                    "{desc}: outer LIST_ITEM should contain nested LIST, got: {:?}",
1028                    outer_item.children().map(|n| n.kind()).collect::<Vec<_>>()
1029                )
1030            });
1031
1032        // Nested list should have a LIST_ITEM
1033        let nested_item = nested_list
1034            .children()
1035            .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
1036            .unwrap_or_else(|| panic!("{desc}: nested LIST should have LIST_ITEM"));
1037
1038        // Nested list item should be empty (no PLAIN or TEXT content)
1039        let has_plain = nested_item
1040            .children()
1041            .any(|n| n.kind() == SyntaxKind::PLAIN);
1042        assert!(
1043            !has_plain,
1044            "{desc}: nested LIST_ITEM should not have PLAIN node (should be empty)"
1045        );
1046    }
1047}
1048
1049// Helper functions for list management in Parser
1050
1051/// Check if we're in any list.
1052pub(in crate::parser) fn in_list(containers: &ContainerStack) -> bool {
1053    containers
1054        .stack
1055        .iter()
1056        .any(|c| matches!(c, Container::List { .. }))
1057}
1058
1059/// Check if we're in a list inside a blockquote.
1060pub(in crate::parser) fn in_blockquote_list(containers: &ContainerStack) -> bool {
1061    let mut seen_blockquote = false;
1062    for c in &containers.stack {
1063        if matches!(c, Container::BlockQuote { .. }) {
1064            seen_blockquote = true;
1065        }
1066        if seen_blockquote && matches!(c, Container::List { .. }) {
1067            return true;
1068        }
1069    }
1070    false
1071}
1072
1073/// Find matching list level for a marker with the given indent.
1074pub(in crate::parser) fn find_matching_list_level(
1075    containers: &ContainerStack,
1076    marker: &ListMarker,
1077    indent_cols: usize,
1078    dialect: crate::Dialect,
1079) -> Option<usize> {
1080    // Search from deepest (last) to shallowest (first)
1081    // But for shallow items (0-3 indent), prefer matching at the closest base indent
1082    let mut best_match: Option<(usize, usize, bool)> = None; // (index, distance, base_leq_indent)
1083
1084    let is_deep_ordered = matches!(marker, ListMarker::Ordered(_)) && indent_cols >= 4;
1085    let mut best_above_match: Option<(usize, usize)> = None; // (index, delta = base - indent), ordered deep only
1086
1087    for (i, c) in containers.stack.iter().enumerate().rev() {
1088        if let Container::List {
1089            marker: list_marker,
1090            base_indent_cols,
1091            ..
1092        } = c
1093            && markers_match(marker, list_marker, dialect)
1094        {
1095            let matches = if indent_cols >= 4 && *base_indent_cols >= 4 {
1096                // Deep indentation:
1097                // - bullets stay directional to preserve nesting boundaries
1098                // - ordered markers allow small symmetric drift to keep
1099                //   marker-width-aligned lists (i./ii./iii.) at one level
1100                match (marker, list_marker) {
1101                    (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
1102                        indent_cols.abs_diff(*base_indent_cols) <= 3
1103                    }
1104                    _ => indent_cols >= *base_indent_cols && indent_cols <= base_indent_cols + 3,
1105                }
1106            } else if indent_cols >= 4 || *base_indent_cols >= 4 {
1107                // One shallow, one deep:
1108                // - ordered markers still allow symmetric drift so aligned roman
1109                //   markers (e.g. 3/4/5 spaces for i./ii./iii.) stay at one level
1110                // - bullets remain directional to preserve nesting boundaries
1111                match (marker, list_marker) {
1112                    (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
1113                        indent_cols.abs_diff(*base_indent_cols) <= 3
1114                    }
1115                    _ => false,
1116                }
1117            } else {
1118                // Both at shallow indentation (0-3)
1119                // Allow items within 3 spaces
1120                indent_cols.abs_diff(*base_indent_cols) <= 3
1121            };
1122
1123            if matches {
1124                let distance = indent_cols.abs_diff(*base_indent_cols);
1125                let base_leq_indent = *base_indent_cols <= indent_cols;
1126
1127                // For deep ordered lists, avoid "nearest below" re-association caused by
1128                // formatter alignment shifts (e.g. i./ii./iii. becoming 6/7/8-space indents).
1129                // Prefer matching the nearest enclosing level whose base indent is >= current.
1130                if is_deep_ordered
1131                    && matches!(
1132                        (marker, list_marker),
1133                        (ListMarker::Ordered(_), ListMarker::Ordered(_))
1134                    )
1135                    && *base_indent_cols >= indent_cols
1136                {
1137                    let delta = *base_indent_cols - indent_cols;
1138                    if best_above_match.is_none_or(|(_, best_delta)| delta < best_delta) {
1139                        best_above_match = Some((i, delta));
1140                    }
1141                }
1142
1143                if let Some((_, best_dist, best_base_leq)) = best_match {
1144                    if distance < best_dist
1145                        || (distance == best_dist && base_leq_indent && !best_base_leq)
1146                    {
1147                        best_match = Some((i, distance, base_leq_indent));
1148                    }
1149                } else {
1150                    best_match = Some((i, distance, base_leq_indent));
1151                }
1152
1153                // If we found an exact match, return immediately
1154                if distance == 0 {
1155                    return Some(i);
1156                }
1157            }
1158        }
1159    }
1160
1161    if let Some((index, _)) = best_above_match {
1162        return Some(index);
1163    }
1164
1165    best_match.map(|(i, _, _)| i)
1166}
1167
1168/// Start a nested list within an existing list item.
1169pub(in crate::parser) fn start_nested_list(
1170    containers: &mut ContainerStack,
1171    builder: &mut GreenNodeBuilder<'static>,
1172    marker: &ListMarker,
1173    item: &ListItemEmissionInput<'_>,
1174    indent_to_emit: Option<&str>,
1175    config: &ParserOptions,
1176) {
1177    // Emit the indent if needed
1178    if let Some(indent_str) = indent_to_emit {
1179        builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1180    }
1181
1182    // Start nested list
1183    builder.start_node(SyntaxKind::LIST.into());
1184    containers.push(Container::List {
1185        marker: marker.clone(),
1186        base_indent_cols: item.indent_cols,
1187        has_blank_between_items: false,
1188    });
1189
1190    // Add the nested list item
1191    let (content_col, text_to_buffer) = emit_list_item(builder, item);
1192    finish_list_item_with_optional_nested(
1193        containers,
1194        builder,
1195        content_col,
1196        text_to_buffer,
1197        item.virtual_marker_space,
1198        config,
1199    );
1200}
1201
1202/// Checks if the content after a list marker is exactly another bullet marker.
1203/// Returns the nested bullet marker character if detected.
1204pub(in crate::parser) fn is_content_nested_bullet_marker(
1205    content: &str,
1206    marker_len: usize,
1207    spaces_after_bytes: usize,
1208) -> Option<char> {
1209    let (_, indent_bytes) = leading_indent(content);
1210    let content_start = indent_bytes + marker_len + spaces_after_bytes;
1211
1212    if content_start >= content.len() {
1213        return None;
1214    }
1215
1216    let remaining = &content[content_start..];
1217    let (text_part, _) = strip_newline(remaining);
1218    let trimmed = text_part.trim();
1219
1220    // Check if it's exactly one of the bullet marker characters
1221    if trimmed.len() == 1 {
1222        let ch = trimmed.chars().next().unwrap();
1223        if matches!(ch, '*' | '+' | '-') {
1224            return Some(ch);
1225        }
1226    }
1227
1228    None
1229}
1230
1231/// Add a list item that contains a nested empty list (for cases like `- *`).
1232/// This creates: LIST_ITEM (outer) -> LIST (nested) -> LIST_ITEM (empty inner)
1233pub(in crate::parser) fn add_list_item_with_nested_empty_list(
1234    containers: &mut ContainerStack,
1235    builder: &mut GreenNodeBuilder<'static>,
1236    item: &ListItemEmissionInput<'_>,
1237    nested_marker: char,
1238) {
1239    // First, emit the outer list item (just marker + whitespace)
1240    builder.start_node(SyntaxKind::LIST_ITEM.into());
1241
1242    // Emit leading indentation for lossless parsing
1243    if item.indent_bytes > 0 {
1244        builder.token(
1245            SyntaxKind::WHITESPACE.into(),
1246            &item.content[..item.indent_bytes],
1247        );
1248    }
1249
1250    let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
1251    builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
1252
1253    if item.spaces_after_bytes > 0 {
1254        let space_start = item.indent_bytes + item.marker_len;
1255        let space_end = space_start + item.spaces_after_bytes;
1256        if space_end <= item.content.len() {
1257            builder.token(
1258                SyntaxKind::WHITESPACE.into(),
1259                &item.content[space_start..space_end],
1260            );
1261        }
1262    }
1263
1264    // Now start the nested list inside this item
1265    builder.start_node(SyntaxKind::LIST.into());
1266
1267    // Add empty list item to the nested list
1268    builder.start_node(SyntaxKind::LIST_ITEM.into());
1269    builder.token(SyntaxKind::LIST_MARKER.into(), &nested_marker.to_string());
1270
1271    // Extract and emit the newline from original content (lossless)
1272    let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
1273    if content_start < item.content.len() {
1274        let remaining = &item.content[content_start..];
1275        // Skip the nested marker character (1 byte) and get the newline
1276        if remaining.len() > 1 {
1277            let (_, newline_str) = strip_newline(&remaining[1..]);
1278            if !newline_str.is_empty() {
1279                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1280            }
1281        }
1282    }
1283
1284    builder.finish_node(); // Close nested LIST_ITEM
1285    builder.finish_node(); // Close nested LIST
1286
1287    // Push container for the outer list item
1288    let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
1289    containers.push(Container::ListItem {
1290        content_col,
1291        buffer: ListItemBuffer::new(),
1292        marker_only: false, // The nested LIST counts as real content.
1293        virtual_marker_space: item.virtual_marker_space,
1294    });
1295}
1296
1297/// Add a list item to the current list.
1298pub(in crate::parser) fn add_list_item(
1299    containers: &mut ContainerStack,
1300    builder: &mut GreenNodeBuilder<'static>,
1301    item: &ListItemEmissionInput<'_>,
1302    config: &ParserOptions,
1303) {
1304    let (content_col, text_to_buffer) = emit_list_item(builder, item);
1305
1306    log::trace!(
1307        "add_list_item: content={:?}, text_to_buffer={:?}",
1308        item.content,
1309        text_to_buffer
1310    );
1311
1312    finish_list_item_with_optional_nested(
1313        containers,
1314        builder,
1315        content_col,
1316        text_to_buffer,
1317        item.virtual_marker_space,
1318        config,
1319    );
1320}
1321
1322/// Finish a list item by either buffering its content or, when the buffered
1323/// content begins with another list marker followed by content, recursively
1324/// opening a nested LIST with another LIST_ITEM. Pushes the appropriate
1325/// containers onto the stack so the caller doesn't need to.
1326fn finish_list_item_with_optional_nested(
1327    containers: &mut ContainerStack,
1328    builder: &mut GreenNodeBuilder<'static>,
1329    content_col: usize,
1330    text_to_buffer: String,
1331    virtual_marker_space: bool,
1332    config: &ParserOptions,
1333) {
1334    // A line whose content is a thematic break (e.g. `* * *`) takes precedence
1335    // over being parsed as a sequence of nested list markers. Both dialects
1336    // agree: `- * * *` is a list item containing a thematic break, not a
1337    // chain of bullets.
1338    let buffered_is_thematic_break =
1339        super::horizontal_rules::try_parse_horizontal_rule(trim_end_newlines(&text_to_buffer))
1340            .is_some();
1341
1342    // Recursive same-line nested list emission is gated to CommonMark.
1343    // Pandoc-markdown also nests in this position (e.g. `- b. foo` is a
1344    // bullet wrapping an alpha-ordered list), but the formatter does not
1345    // yet support emitting an outer LIST_ITEM whose only child is a
1346    // nested LIST, so producing the nested CST under Pandoc breaks
1347    // formatter idempotency. Tracked as future work.
1348    let dialect_allows_nested = config.dialect == crate::Dialect::CommonMark;
1349
1350    if dialect_allows_nested
1351        && !buffered_is_thematic_break
1352        && let Some(inner_match) = try_parse_list_marker(&text_to_buffer, config)
1353    {
1354        let inner_content_start = inner_match.marker_len + inner_match.spaces_after_bytes;
1355        let after_inner =
1356            trim_end_newlines(text_to_buffer.get(inner_content_start..).unwrap_or(""));
1357        // Recurse only when there is real content after the inner marker.
1358        // The bare-inner-marker case (e.g. `- *`) is handled by the existing
1359        // `add_list_item_with_nested_empty_list` path.
1360        if !after_inner.is_empty() {
1361            // Push outer ListItem with empty buffer.
1362            containers.push(Container::ListItem {
1363                content_col,
1364                buffer: ListItemBuffer::new(),
1365                marker_only: false, // The nested LIST counts as real content.
1366                virtual_marker_space,
1367            });
1368            // Open nested LIST inside the outer LIST_ITEM.
1369            builder.start_node(SyntaxKind::LIST.into());
1370            containers.push(Container::List {
1371                marker: inner_match.marker.clone(),
1372                base_indent_cols: content_col,
1373                has_blank_between_items: false,
1374            });
1375            // Emit nested LIST_ITEM via emit_list_item, then recurse on its
1376            // content for further-nested same-line markers.
1377            let inner_item = ListItemEmissionInput {
1378                content: text_to_buffer.as_str(),
1379                marker_len: inner_match.marker_len,
1380                spaces_after_cols: inner_match.spaces_after_cols,
1381                spaces_after_bytes: inner_match.spaces_after_bytes,
1382                indent_cols: content_col,
1383                indent_bytes: 0,
1384                virtual_marker_space: inner_match.virtual_marker_space,
1385            };
1386            let (inner_content_col, inner_text_to_buffer) = emit_list_item(builder, &inner_item);
1387            finish_list_item_with_optional_nested(
1388                containers,
1389                builder,
1390                inner_content_col,
1391                inner_text_to_buffer,
1392                inner_match.virtual_marker_space,
1393                config,
1394            );
1395            return;
1396        }
1397    }
1398
1399    // Same-line blockquote marker inside a list item: `1. > Blockquote`
1400    // opens a BLOCK_QUOTE inside the LIST_ITEM, with the post-marker text
1401    // becoming the first line of the blockquote's paragraph. Both
1402    // CommonMark and Pandoc-markdown agree on this shape (verified via
1403    // `pandoc -f commonmark` and `pandoc -f markdown`), but emission is
1404    // gated to CommonMark for now: the formatter does not yet preserve a
1405    // LIST_ITEM whose first structural child is a BLOCK_QUOTE through a
1406    // round-trip (the LIST_MARKER gets dropped on re-format, breaking
1407    // idempotency). Gating mirrors the same-line nested LIST gate above
1408    // and is tracked alongside that formatter work.
1409    if dialect_allows_nested
1410        && !buffered_is_thematic_break
1411        && text_to_buffer.starts_with('>')
1412        && !text_to_buffer.starts_with(">>")
1413    {
1414        let bytes = text_to_buffer.as_bytes();
1415        let has_trailing_space = bytes.get(1).copied() == Some(b' ');
1416        let content_offset = if has_trailing_space { 2 } else { 1 };
1417        let remaining = &text_to_buffer[content_offset..];
1418
1419        // Push outer ListItem with empty buffer; the inner BLOCK_QUOTE
1420        // counts as real content so `marker_only` is false.
1421        containers.push(Container::ListItem {
1422            content_col,
1423            buffer: ListItemBuffer::new(),
1424            marker_only: false,
1425            virtual_marker_space,
1426        });
1427
1428        // Open BLOCK_QUOTE node inside the LIST_ITEM and emit the marker.
1429        builder.start_node(SyntaxKind::BLOCK_QUOTE.into());
1430        builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
1431        if has_trailing_space {
1432            builder.token(SyntaxKind::WHITESPACE.into(), " ");
1433        }
1434        containers.push(Container::BlockQuote {});
1435
1436        // If there is content after `> `, start a paragraph and buffer
1437        // the first line; subsequent lines flow in via the parser's main
1438        // loop (lazy continuation handles the no-marker continuation
1439        // line in cases like #292).
1440        let trimmed = trim_end_newlines(remaining);
1441        if !trimmed.is_empty() {
1442            crate::parser::blocks::paragraphs::start_paragraph_if_needed(containers, builder);
1443            crate::parser::blocks::paragraphs::append_paragraph_line(
1444                containers, builder, remaining, config,
1445            );
1446        }
1447        return;
1448    }
1449
1450    let marker_only = text_to_buffer.trim().is_empty();
1451    let mut buffer = ListItemBuffer::new();
1452    if !text_to_buffer.is_empty() {
1453        buffer.push_text(text_to_buffer);
1454    }
1455    containers.push(Container::ListItem {
1456        content_col,
1457        buffer,
1458        marker_only,
1459        virtual_marker_space,
1460    });
1461}