Skip to main content

panache_parser/parser/blocks/
lists.rs

1use crate::options::ParserOptions;
2use crate::syntax::SyntaxKind;
3use rowan::GreenNodeBuilder;
4
5use crate::parser::utils::container_stack::{
6    Container, ContainerStack, leading_indent, leading_indent_from,
7};
8use crate::parser::utils::helpers::{strip_newline, trim_end_newlines};
9use crate::parser::utils::list_item_buffer::ListItemBuffer;
10
11/// Signal returned by `add_list_item` / `finish_list_item_with_optional_nested`
12/// so the caller can decide how to handle leftover first-line content.
13///
14/// `BqDispatch` fires when the list item opens an inner BLOCK_QUOTE on the same
15/// line (`- > <content>`) and the post-`> ` content is non-empty and not itself
16/// a list marker. The caller is responsible for dispatching `content` through
17/// the block parser (typically `Parser::parse_inner_content`) so block-level
18/// constructs like HTML blocks or headings are recognized rather than wrapped
19/// in a stray paragraph.
20pub(in crate::parser) enum ListItemFinish {
21    Done,
22    BqDispatch { content: String },
23}
24
25#[derive(Debug, Clone, PartialEq)]
26pub(crate) enum ListMarker {
27    Bullet(char),
28    Ordered(OrderedMarker),
29}
30
31#[derive(Debug, Clone, PartialEq)]
32pub(crate) enum OrderedMarker {
33    Decimal {
34        number: String,
35        style: ListDelimiter,
36    },
37    Hash,
38    LowerAlpha {
39        letter: char,
40        style: ListDelimiter,
41    },
42    UpperAlpha {
43        letter: char,
44        style: ListDelimiter,
45    },
46    LowerRoman {
47        numeral: String,
48        style: ListDelimiter,
49    },
50    UpperRoman {
51        numeral: String,
52        style: ListDelimiter,
53    },
54    Example {
55        label: Option<String>,
56    },
57}
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq)]
60pub(crate) enum ListDelimiter {
61    Period,
62    RightParen,
63    Parens,
64}
65
66/// Context hint for marker detection: the kind of open alphabetic list (if
67/// any) at the candidate line's indent column. Used to disambiguate
68/// single-letter Roman candidates {i,v,x,I,V,X} from their letter
69/// interpretation in Pandoc-dialect input. Pandoc parses `a. … h. … i. … j.`
70/// as a single LowerAlpha list (the `i.` after the blank line continues as
71/// the letter `i`, not as Roman numeral 1). Marker detection needs this
72/// signal to make that classification in a single pass.
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
74pub(crate) enum OpenListHint {
75    #[default]
76    None,
77    LowerAlpha,
78    UpperAlpha,
79}
80
81#[derive(Debug, Clone, PartialEq)]
82pub(crate) struct ListMarkerMatch {
83    pub(crate) marker: ListMarker,
84    pub(crate) marker_len: usize,
85    pub(crate) spaces_after_cols: usize,
86    pub(crate) spaces_after_bytes: usize,
87    /// True when CommonMark's "≥ 5 cols of post-marker whitespace → marker + 1
88    /// virtual space; rest belongs to content" rule fired during marker
89    /// detection. The marker's required 1 col of trailing space was virtually
90    /// absorbed (typically from a tab) rather than consumed as a literal byte;
91    /// the surplus whitespace is left in the post-marker text so block-level
92    /// detection can recognize it as an indented code block.
93    pub(crate) virtual_marker_space: bool,
94}
95
96#[derive(Debug, Clone, Copy)]
97pub(in crate::parser) struct ListItemEmissionInput<'a> {
98    pub content: &'a str,
99    pub marker_len: usize,
100    pub spaces_after_cols: usize,
101    pub spaces_after_bytes: usize,
102    pub indent_cols: usize,
103    pub indent_bytes: usize,
104    pub virtual_marker_space: bool,
105}
106
107/// Parse a Roman numeral (lower or upper case).
108/// Returns the byte-length of the numeral if valid, None otherwise.
109///
110/// Byte-level and allocation-free. Callers (`try_parse_list_marker` for
111/// fancy-list ordering) hit this on every line, so the prior path —
112/// `to_uppercase` String + repeated `Vec<char>::collect` + an always-
113/// allocated `String` return — was a profile hotspot. All Roman numeral
114/// chars are ASCII; map to canonical-upper byte via `b & !0x20` and
115/// validate without heap traffic. Callers slice the original input
116/// only on a confirmed full match (when the trailing `.` / `)` is
117/// also present), so the `String` cost is moved off the no-match path.
118fn try_parse_roman_numeral(text: &str, uppercase: bool) -> Option<usize> {
119    let bytes = text.as_bytes();
120    // Take while ASCII char is one of `IVXLCDM` (case-folded).
121    let mut count = 0usize;
122    while count < bytes.len() {
123        let b = bytes[count];
124        let valid = if uppercase {
125            matches!(b, b'I' | b'V' | b'X' | b'L' | b'C' | b'D' | b'M')
126        } else {
127            matches!(b, b'i' | b'v' | b'x' | b'l' | b'c' | b'd' | b'm')
128        };
129        if !valid {
130            break;
131        }
132        count += 1;
133    }
134
135    if count == 0 {
136        return None;
137    }
138
139    // For single-character numerals, only accept the most common ones to avoid
140    // ambiguity with alphabetic list markers (a-z, A-Z).
141    if count == 1 {
142        let upper = bytes[0] & !0x20;
143        if !matches!(upper, b'I' | b'V' | b'X') {
144            return None;
145        }
146    }
147
148    // Reject sequences of >= 4 consecutive same chars (case-insensitive).
149    // Also reject doubled V/L/D (only ever appear once in valid Romans).
150    let mut run_byte = 0u8;
151    let mut run_len = 0usize;
152    for &b in &bytes[..count] {
153        let upper = b & !0x20;
154        if upper == run_byte {
155            run_len += 1;
156        } else {
157            run_byte = upper;
158            run_len = 1;
159        }
160        if (run_len > 3 && matches!(upper, b'I' | b'X' | b'C'))
161            || (run_len > 1 && matches!(upper, b'V' | b'L' | b'D'))
162        {
163            return None;
164        }
165    }
166
167    // Validate subtractive notation: V/L/D can never precede a larger
168    // numeral; I, X, C only precede the next two larger units.
169    fn val(upper: u8) -> u32 {
170        match upper {
171            b'I' => 1,
172            b'V' => 5,
173            b'X' => 10,
174            b'L' => 50,
175            b'C' => 100,
176            b'D' => 500,
177            b'M' => 1000,
178            _ => 0,
179        }
180    }
181    for i in 0..count.saturating_sub(1) {
182        let curr = bytes[i] & !0x20;
183        let next = bytes[i + 1] & !0x20;
184        let cv = val(curr);
185        let nv = val(next);
186        if cv < nv {
187            match (curr, next) {
188                (b'I', b'V') | (b'I', b'X') => {}
189                (b'X', b'L') | (b'X', b'C') => {}
190                (b'C', b'D') | (b'C', b'M') => {}
191                _ => return None,
192            }
193        }
194    }
195    Some(count)
196}
197
198/// Compute (spaces_after_cols, spaces_after_bytes, virtual_marker_space) for a
199/// post-marker string starting at column `marker_end_col` of the source line.
200///
201/// Implements CommonMark §5.2 rule #2: when the effective column-width of the
202/// post-marker whitespace (counted with tabs expanding from `marker_end_col`)
203/// is ≥ 5 and there is non-empty content after it, the list item's content
204/// column is `marker_end_col + 1` (the marker plus exactly one — possibly
205/// virtual — space). The surplus whitespace is left in the post-marker text
206/// so block-level dispatch can recognize it as an indented code block.
207///
208/// In the rule case, when the first byte is a tab whose source-column span
209/// exceeds 1, no bytes are consumed (the tab stays in content) and
210/// `virtual_marker_space` is true. Otherwise the byte count describes the
211/// literal whitespace consumed as marker space.
212fn marker_spaces_after(after_marker: &str, marker_end_col: usize) -> (usize, usize, bool) {
213    let (effective_cols, n_bytes) = leading_indent_from(after_marker, marker_end_col);
214    let after_ws = &after_marker[n_bytes..];
215    let has_content = !trim_end_newlines(after_ws).is_empty();
216    if has_content && effective_cols >= 5 {
217        let bytes = match after_marker.as_bytes().first() {
218            Some(b' ') => 1,
219            Some(b'\t') => {
220                let span = 4 - (marker_end_col % 4);
221                if span == 1 { 1 } else { 0 }
222            }
223            _ => 0,
224        };
225        (1, bytes, bytes == 0)
226    } else {
227        (effective_cols, n_bytes, false)
228    }
229}
230
231/// Pandoc-dialect single-pass disambiguation: when a single-letter Roman
232/// candidate `{i,v,x}` / `{I,V,X}` would shadow an open same-case alpha
233/// list, reject the Roman classification so detection falls through to the
234/// alpha branch. `numeral_bytes` is the buffer the Roman parser just
235/// validated; `len` is its byte-length. The check fires only for `len == 1`
236/// (multi-character romans like `ii.` are unambiguously Roman) and only in
237/// Pandoc dialect.
238fn single_char_roman_shadowed_by_alpha(
239    numeral_bytes: &[u8],
240    len: usize,
241    uppercase: bool,
242    hint: OpenListHint,
243    dialect: crate::Dialect,
244) -> bool {
245    if dialect != crate::Dialect::Pandoc || len != 1 {
246        return false;
247    }
248    match (uppercase, hint) {
249        (false, OpenListHint::LowerAlpha) => {
250            matches!(numeral_bytes[0], b'i' | b'v' | b'x')
251        }
252        (true, OpenListHint::UpperAlpha) => {
253            matches!(numeral_bytes[0], b'I' | b'V' | b'X')
254        }
255        _ => false,
256    }
257}
258
259pub(crate) fn try_parse_list_marker(
260    line: &str,
261    config: &ParserOptions,
262    open_alpha_hint: OpenListHint,
263) -> Option<ListMarkerMatch> {
264    // Trailing newlines should not block bare-marker detection; the line `*\n`
265    // is a bare bullet marker and the post-marker text is logically empty.
266    let line = trim_end_newlines(line);
267    let (_indent_cols, indent_bytes) = leading_indent(line);
268    let trimmed = &line[indent_bytes..];
269
270    // Try bullet markers (including task lists)
271    if let Some(ch) = trimmed.chars().next()
272        && matches!(ch, '*' | '+' | '-')
273    {
274        let after_marker = &trimmed[1..];
275
276        // Check for task list: [ ] or [x] or [X]
277        let trimmed_after = after_marker.trim_start();
278        let is_task = trimmed_after.starts_with('[')
279            && trimmed_after.len() >= 3
280            && matches!(
281                trimmed_after.chars().nth(1),
282                Some(' ') | Some('x') | Some('X')
283            )
284            && trimmed_after.chars().nth(2) == Some(']');
285
286        // Must be followed by whitespace (or be task list)
287        if after_marker.starts_with(' ')
288            || after_marker.starts_with('\t')
289            || after_marker.is_empty()
290            || is_task
291        {
292            let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
293                marker_spaces_after(after_marker, _indent_cols + 1);
294            return Some(ListMarkerMatch {
295                marker: ListMarker::Bullet(ch),
296                marker_len: 1,
297                spaces_after_cols,
298                spaces_after_bytes,
299                virtual_marker_space,
300            });
301        }
302    }
303
304    // Try ordered markers
305    if config.extensions.fancy_lists
306        && let Some(after_marker) = trimmed.strip_prefix("#.")
307        && (after_marker.starts_with(' ')
308            || after_marker.starts_with('\t')
309            || after_marker.is_empty())
310    {
311        let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
312            marker_spaces_after(after_marker, _indent_cols + 2);
313        return Some(ListMarkerMatch {
314            marker: ListMarker::Ordered(OrderedMarker::Hash),
315            marker_len: 2,
316            spaces_after_cols,
317            spaces_after_bytes,
318            virtual_marker_space,
319        });
320    }
321
322    // Try example lists: (@) or (@label)
323    if config.extensions.example_lists
324        && let Some(rest) = trimmed.strip_prefix("(@")
325    {
326        // Check if it has a label or is just (@)
327        let label_end = rest
328            .chars()
329            .take_while(|c| c.is_alphanumeric() || *c == '_' || *c == '-')
330            .count();
331
332        // Must be followed by ')'
333        if rest.len() > label_end && rest.chars().nth(label_end) == Some(')') {
334            let label = if label_end > 0 {
335                Some(rest[..label_end].to_string())
336            } else {
337                None
338            };
339
340            let after_marker = &rest[label_end + 1..];
341            if after_marker.starts_with(' ')
342                || after_marker.starts_with('\t')
343                || after_marker.is_empty()
344            {
345                let marker_len = 2 + label_end + 1; // "(@" + label + ")"
346                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
347                    marker_spaces_after(after_marker, _indent_cols + marker_len);
348                return Some(ListMarkerMatch {
349                    marker: ListMarker::Ordered(OrderedMarker::Example { label }),
350                    marker_len,
351                    spaces_after_cols,
352                    spaces_after_bytes,
353                    virtual_marker_space,
354                });
355            }
356        }
357    }
358
359    // Try parenthesized markers: (2), (a), (ii)
360    if let Some(rest) = trimmed.strip_prefix('(') {
361        if config.extensions.fancy_lists {
362            // Try decimal: (2)
363            let digit_count = rest.chars().take_while(|c| c.is_ascii_digit()).count();
364            if digit_count > 0
365                && rest.len() > digit_count
366                && rest.chars().nth(digit_count) == Some(')')
367            {
368                let number = &rest[..digit_count];
369                let after_marker = &rest[digit_count + 1..];
370                if after_marker.starts_with(' ')
371                    || after_marker.starts_with('\t')
372                    || after_marker.is_empty()
373                {
374                    let marker_len = 2 + digit_count;
375                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
376                        marker_spaces_after(after_marker, _indent_cols + marker_len);
377                    return Some(ListMarkerMatch {
378                        marker: ListMarker::Ordered(OrderedMarker::Decimal {
379                            number: number.to_string(),
380                            style: ListDelimiter::Parens,
381                        }),
382                        marker_len,
383                        spaces_after_cols,
384                        spaces_after_bytes,
385                        virtual_marker_space,
386                    });
387                }
388            }
389        }
390
391        // Try fancy lists if enabled (parenthesized markers)
392        if config.extensions.fancy_lists {
393            // Try Roman numerals first (to avoid ambiguity with letters i, v, x, etc.)
394
395            // Try lowercase Roman: (ii)
396            if let Some(len) = try_parse_roman_numeral(rest, false)
397                && rest.len() > len
398                && rest.as_bytes()[len] == b')'
399                && !single_char_roman_shadowed_by_alpha(
400                    rest.as_bytes(),
401                    len,
402                    false,
403                    open_alpha_hint,
404                    config.dialect,
405                )
406            {
407                let after_marker = &rest[len + 1..];
408                if after_marker.starts_with(' ')
409                    || after_marker.starts_with('\t')
410                    || after_marker.is_empty()
411                {
412                    let marker_len = len + 2;
413                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
414                        marker_spaces_after(after_marker, _indent_cols + marker_len);
415                    return Some(ListMarkerMatch {
416                        marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
417                            numeral: rest[..len].to_string(),
418                            style: ListDelimiter::Parens,
419                        }),
420                        marker_len,
421                        spaces_after_cols,
422                        spaces_after_bytes,
423                        virtual_marker_space,
424                    });
425                }
426            }
427
428            // Try uppercase Roman: (II)
429            if let Some(len) = try_parse_roman_numeral(rest, true)
430                && rest.len() > len
431                && rest.as_bytes()[len] == b')'
432                && !single_char_roman_shadowed_by_alpha(
433                    rest.as_bytes(),
434                    len,
435                    true,
436                    open_alpha_hint,
437                    config.dialect,
438                )
439            {
440                let after_marker = &rest[len + 1..];
441                if after_marker.starts_with(' ')
442                    || after_marker.starts_with('\t')
443                    || after_marker.is_empty()
444                {
445                    let marker_len = len + 2;
446                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
447                        marker_spaces_after(after_marker, _indent_cols + marker_len);
448                    return Some(ListMarkerMatch {
449                        marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
450                            numeral: rest[..len].to_string(),
451                            style: ListDelimiter::Parens,
452                        }),
453                        marker_len,
454                        spaces_after_cols,
455                        spaces_after_bytes,
456                        virtual_marker_space,
457                    });
458                }
459            }
460
461            // Try lowercase letter: (a)
462            if let Some(ch) = rest.chars().next()
463                && ch.is_ascii_lowercase()
464                && rest.len() > 1
465                && rest.chars().nth(1) == Some(')')
466            {
467                let after_marker = &rest[2..];
468                if after_marker.starts_with(' ')
469                    || after_marker.starts_with('\t')
470                    || after_marker.is_empty()
471                {
472                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
473                        marker_spaces_after(after_marker, _indent_cols + 3);
474                    return Some(ListMarkerMatch {
475                        marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
476                            letter: ch,
477                            style: ListDelimiter::Parens,
478                        }),
479                        marker_len: 3,
480                        spaces_after_cols,
481                        spaces_after_bytes,
482                        virtual_marker_space,
483                    });
484                }
485            }
486
487            // Try uppercase letter: (A)
488            if let Some(ch) = rest.chars().next()
489                && ch.is_ascii_uppercase()
490                && rest.len() > 1
491                && rest.chars().nth(1) == Some(')')
492            {
493                let after_marker = &rest[2..];
494                if after_marker.starts_with(' ')
495                    || after_marker.starts_with('\t')
496                    || after_marker.is_empty()
497                {
498                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
499                        marker_spaces_after(after_marker, _indent_cols + 3);
500                    return Some(ListMarkerMatch {
501                        marker: ListMarker::Ordered(OrderedMarker::UpperAlpha {
502                            letter: ch,
503                            style: ListDelimiter::Parens,
504                        }),
505                        marker_len: 3,
506                        spaces_after_cols,
507                        spaces_after_bytes,
508                        virtual_marker_space,
509                    });
510                }
511            }
512        }
513    }
514
515    // Try decimal numbers: 1. or 1)
516    let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
517    if digit_count > 0 && trimmed.len() > digit_count {
518        // CommonMark restricts ordered list markers to 1-9 digits (spec §5.2).
519        // Pandoc-markdown accepts arbitrary digit counts.
520        if config.dialect == crate::Dialect::CommonMark && digit_count > 9 {
521            return None;
522        }
523
524        let number = &trimmed[..digit_count];
525        let delim = trimmed.chars().nth(digit_count);
526
527        let (style, marker_len) = match delim {
528            Some('.') => (ListDelimiter::Period, digit_count + 1),
529            Some(')') => (ListDelimiter::RightParen, digit_count + 1),
530            _ => return None,
531        };
532        // CommonMark §5.2: decimal `1)` markers are part of the core grammar.
533        // Pandoc-markdown gates `)`-style ordered markers behind `fancy_lists`.
534        if style == ListDelimiter::RightParen
535            && !config.extensions.fancy_lists
536            && config.dialect != crate::Dialect::CommonMark
537        {
538            return None;
539        }
540
541        let after_marker = &trimmed[marker_len..];
542        if after_marker.starts_with(' ')
543            || after_marker.starts_with('\t')
544            || after_marker.is_empty()
545        {
546            let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
547                marker_spaces_after(after_marker, _indent_cols + marker_len);
548            return Some(ListMarkerMatch {
549                marker: ListMarker::Ordered(OrderedMarker::Decimal {
550                    number: number.to_string(),
551                    style,
552                }),
553                marker_len,
554                spaces_after_cols,
555                spaces_after_bytes,
556                virtual_marker_space,
557            });
558        }
559    }
560
561    // Try fancy lists if enabled (non-parenthesized)
562    if config.extensions.fancy_lists {
563        // Try Roman numerals first, as they may overlap with letters
564
565        // Try lowercase Roman: i. or ii)
566        if let Some(len) = try_parse_roman_numeral(trimmed, false)
567            && trimmed.len() > len
568            && let delim = trimmed.as_bytes()[len]
569            && (delim == b'.' || delim == b')')
570            && !single_char_roman_shadowed_by_alpha(
571                trimmed.as_bytes(),
572                len,
573                false,
574                open_alpha_hint,
575                config.dialect,
576            )
577        {
578            let style = if delim == b'.' {
579                ListDelimiter::Period
580            } else {
581                ListDelimiter::RightParen
582            };
583            let marker_len = len + 1;
584
585            let after_marker = &trimmed[marker_len..];
586            if after_marker.starts_with(' ')
587                || after_marker.starts_with('\t')
588                || after_marker.is_empty()
589            {
590                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
591                    marker_spaces_after(after_marker, _indent_cols + marker_len);
592                return Some(ListMarkerMatch {
593                    marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
594                        numeral: trimmed[..len].to_string(),
595                        style,
596                    }),
597                    marker_len,
598                    spaces_after_cols,
599                    spaces_after_bytes,
600                    virtual_marker_space,
601                });
602            }
603        }
604
605        // Try uppercase Roman: I. or II)
606        if let Some(len) = try_parse_roman_numeral(trimmed, true)
607            && trimmed.len() > len
608            && let delim = trimmed.as_bytes()[len]
609            && (delim == b'.' || delim == b')')
610            && !single_char_roman_shadowed_by_alpha(
611                trimmed.as_bytes(),
612                len,
613                true,
614                open_alpha_hint,
615                config.dialect,
616            )
617        {
618            let style = if delim == b'.' {
619                ListDelimiter::Period
620            } else {
621                ListDelimiter::RightParen
622            };
623            let marker_len = len + 1;
624
625            let after_marker = &trimmed[marker_len..];
626            // Pandoc: single-character uppercase Roman (I, V, X, L, C, D, M)
627            // followed by `.` requires two spaces, to avoid confusion with
628            // initials like "I. M. Pei". Multi-character romans (II., XII.,
629            // …) and the right-paren form (I)) only need one space. See
630            // pandoc/src/Text/Pandoc/Readers/Markdown.hs `orderedListStart`.
631            let min_spaces = if delim == b'.' && len == 1 { 2 } else { 1 };
632            let (effective_cols, _) = leading_indent_from(after_marker, _indent_cols + marker_len);
633
634            if (after_marker.starts_with(' ')
635                || after_marker.starts_with('\t')
636                || after_marker.is_empty())
637                && (after_marker.is_empty() || effective_cols >= min_spaces)
638            {
639                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
640                    marker_spaces_after(after_marker, _indent_cols + marker_len);
641                return Some(ListMarkerMatch {
642                    marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
643                        numeral: trimmed[..len].to_string(),
644                        style,
645                    }),
646                    marker_len,
647                    spaces_after_cols,
648                    spaces_after_bytes,
649                    virtual_marker_space,
650                });
651            }
652        }
653
654        // Try lowercase letter: a. or a)
655        if let Some(ch) = trimmed.chars().next()
656            && ch.is_ascii_lowercase()
657            && trimmed.len() > 1
658            && let Some(delim) = trimmed.chars().nth(1)
659            && (delim == '.' || delim == ')')
660        {
661            let style = if delim == '.' {
662                ListDelimiter::Period
663            } else {
664                ListDelimiter::RightParen
665            };
666            let marker_len = 2;
667
668            let after_marker = &trimmed[marker_len..];
669            if after_marker.starts_with(' ')
670                || after_marker.starts_with('\t')
671                || after_marker.is_empty()
672            {
673                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
674                    marker_spaces_after(after_marker, _indent_cols + marker_len);
675                return Some(ListMarkerMatch {
676                    marker: ListMarker::Ordered(OrderedMarker::LowerAlpha { letter: ch, style }),
677                    marker_len,
678                    spaces_after_cols,
679                    spaces_after_bytes,
680                    virtual_marker_space,
681                });
682            }
683        }
684
685        // Try uppercase letter: A. or A)
686        if let Some(ch) = trimmed.chars().next()
687            && ch.is_ascii_uppercase()
688            && trimmed.len() > 1
689            && let Some(delim) = trimmed.chars().nth(1)
690            && (delim == '.' || delim == ')')
691        {
692            let style = if delim == '.' {
693                ListDelimiter::Period
694            } else {
695                ListDelimiter::RightParen
696            };
697            let marker_len = 2;
698
699            let after_marker = &trimmed[marker_len..];
700            // Special rule: uppercase letter with period needs 2 spaces minimum
701            let min_spaces = if delim == '.' { 2 } else { 1 };
702            let (effective_cols, _) = leading_indent_from(after_marker, _indent_cols + marker_len);
703
704            if (after_marker.starts_with(' ') || after_marker.starts_with('\t'))
705                && effective_cols >= min_spaces
706            {
707                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
708                    marker_spaces_after(after_marker, _indent_cols + marker_len);
709                return Some(ListMarkerMatch {
710                    marker: ListMarker::Ordered(OrderedMarker::UpperAlpha { letter: ch, style }),
711                    marker_len,
712                    spaces_after_cols,
713                    spaces_after_bytes,
714                    virtual_marker_space,
715                });
716            }
717        }
718    }
719
720    None
721}
722
723pub(crate) fn markers_match(a: &ListMarker, b: &ListMarker, dialect: crate::Dialect) -> bool {
724    match (a, b) {
725        // CommonMark §5.3: bullet list markers `-`, `+`, `*` are *distinct*
726        // bullet types — switching from one to another starts a new list.
727        // Pandoc-markdown treats them as interchangeable: any bullet
728        // continues an open bullet list. Verified with pandoc against
729        // `- foo\n- bar\n+ baz\n` (#301).
730        (ListMarker::Bullet(ca), ListMarker::Bullet(cb)) => match dialect {
731            crate::Dialect::CommonMark => ca == cb,
732            _ => true,
733        },
734        (ListMarker::Ordered(OrderedMarker::Hash), ListMarker::Ordered(OrderedMarker::Hash)) => {
735            true
736        }
737        (
738            ListMarker::Ordered(OrderedMarker::Decimal { style: s1, .. }),
739            ListMarker::Ordered(OrderedMarker::Decimal { style: s2, .. }),
740        ) => s1 == s2,
741        (
742            ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s1, .. }),
743            ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s2, .. }),
744        ) => s1 == s2,
745        (
746            ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s1, .. }),
747            ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s2, .. }),
748        ) => s1 == s2,
749        (
750            ListMarker::Ordered(OrderedMarker::LowerRoman { style: s1, .. }),
751            ListMarker::Ordered(OrderedMarker::LowerRoman { style: s2, .. }),
752        ) => s1 == s2,
753        (
754            ListMarker::Ordered(OrderedMarker::UpperRoman { style: s1, .. }),
755            ListMarker::Ordered(OrderedMarker::UpperRoman { style: s2, .. }),
756        ) => s1 == s2,
757        (
758            ListMarker::Ordered(OrderedMarker::Example { .. }),
759            ListMarker::Ordered(OrderedMarker::Example { .. }),
760        ) => true, // All example list items match each other
761        _ => false,
762    }
763}
764
765/// One tab stop: the indentation (in columns) required for list continuation
766/// paragraphs and nested lists under the `four_space_rule` extension
767/// (pandoc <= 2.0 list semantics).
768const FOUR_SPACE_RULE_COLS: usize = 4;
769
770/// Column at which a list item's content logically begins. This is the
771/// threshold used downstream for continuation/nesting classification and for
772/// stripping the leading indent off continuation lines.
773///
774/// By default it lines up with the first non-space character after the marker
775/// (CommonMark / pandoc default). Under the `four_space_rule` extension it is a
776/// flat one-tab-width per nesting level, independent of marker width — so a
777/// `100.` marker still requires four-space continuation, not six.
778pub(in crate::parser) fn list_item_content_col(
779    indent_cols: usize,
780    marker_len: usize,
781    spaces_after_cols: usize,
782    config: &ParserOptions,
783) -> usize {
784    if config.extensions.four_space_rule {
785        indent_cols + FOUR_SPACE_RULE_COLS
786    } else {
787        indent_cols + marker_len + spaces_after_cols
788    }
789}
790
791/// Emit a list item node to the builder (marker and whitespace only).
792/// Returns (content_col, text_to_buffer) where text_to_buffer is the content that should be
793/// added to the list item buffer for later inline parsing.
794pub(in crate::parser) fn emit_list_item(
795    builder: &mut GreenNodeBuilder<'static>,
796    item: &ListItemEmissionInput<'_>,
797    config: &ParserOptions,
798) -> (usize, String) {
799    builder.start_node(SyntaxKind::LIST_ITEM.into());
800
801    // Emit leading indentation for lossless parsing
802    if item.indent_bytes > 0 {
803        builder.token(
804            SyntaxKind::WHITESPACE.into(),
805            &item.content[..item.indent_bytes],
806        );
807    }
808
809    let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
810    builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
811
812    if item.spaces_after_bytes > 0 {
813        let space_start = item.indent_bytes + item.marker_len;
814        let space_end = space_start + item.spaces_after_bytes;
815        if space_end <= item.content.len() {
816            builder.token(
817                SyntaxKind::WHITESPACE.into(),
818                &item.content[space_start..space_end],
819            );
820        }
821    }
822
823    let content_col = list_item_content_col(
824        item.indent_cols,
825        item.marker_len,
826        item.spaces_after_cols,
827        config,
828    );
829    let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
830
831    // Extract text content to be buffered (instead of emitting it directly).
832    // If the item starts with a task checkbox, emit it as a dedicated token so it
833    // doesn't get parsed as a link.
834    let text_to_buffer = if content_start < item.content.len() {
835        let rest = &item.content[content_start..];
836        if (rest.starts_with("[ ]") || rest.starts_with("[x]") || rest.starts_with("[X]"))
837            && rest
838                .as_bytes()
839                .get(3)
840                .is_some_and(|b| (*b as char).is_whitespace())
841        {
842            builder.token(SyntaxKind::TASK_CHECKBOX.into(), &rest[..3]);
843            rest[3..].to_string()
844        } else {
845            rest.to_string()
846        }
847    } else {
848        String::new()
849    };
850
851    (content_col, text_to_buffer)
852}
853
854#[cfg(test)]
855mod tests {
856    use super::*;
857    use crate::options::ParserOptions;
858
859    #[test]
860    fn detects_bullet_markers() {
861        let config = ParserOptions::default();
862        assert!(try_parse_list_marker("* item", &config, OpenListHint::None).is_some());
863        assert!(try_parse_list_marker("*\titem", &config, OpenListHint::None).is_some());
864    }
865
866    #[test]
867    fn detects_fancy_alpha_markers() {
868        let mut config = ParserOptions::default();
869        config.extensions.fancy_lists = true;
870
871        // Test lowercase alpha period
872        assert!(
873            try_parse_list_marker("a. item", &config, OpenListHint::None).is_some(),
874            "a. should parse"
875        );
876        assert!(
877            try_parse_list_marker("b. item", &config, OpenListHint::None).is_some(),
878            "b. should parse"
879        );
880        assert!(
881            try_parse_list_marker("c. item", &config, OpenListHint::None).is_some(),
882            "c. should parse"
883        );
884
885        // Test lowercase alpha right paren
886        assert!(
887            try_parse_list_marker("a) item", &config, OpenListHint::None).is_some(),
888            "a) should parse"
889        );
890        assert!(
891            try_parse_list_marker("b) item", &config, OpenListHint::None).is_some(),
892            "b) should parse"
893        );
894    }
895
896    #[test]
897    fn single_letter_i_classified_as_alpha_with_lower_alpha_hint() {
898        let config = ParserOptions::default(); // Pandoc + fancy_lists
899        let m = try_parse_list_marker("i. foo", &config, OpenListHint::LowerAlpha).unwrap();
900        assert!(
901            matches!(
902                m.marker,
903                ListMarker::Ordered(OrderedMarker::LowerAlpha { letter: 'i', .. })
904            ),
905            "i. should classify as LowerAlpha when a LowerAlpha list is open: got {:?}",
906            m.marker
907        );
908    }
909
910    #[test]
911    fn single_letter_i_classified_as_roman_with_no_hint() {
912        let config = ParserOptions::default();
913        let m = try_parse_list_marker("i. foo", &config, OpenListHint::None).unwrap();
914        assert!(
915            matches!(
916                m.marker,
917                ListMarker::Ordered(OrderedMarker::LowerRoman { .. })
918            ),
919            "i. should classify as LowerRoman with no hint: got {:?}",
920            m.marker
921        );
922    }
923
924    #[test]
925    fn multichar_roman_ignores_hint() {
926        let config = ParserOptions::default();
927        let m = try_parse_list_marker("ii. foo", &config, OpenListHint::LowerAlpha).unwrap();
928        assert!(
929            matches!(
930                m.marker,
931                ListMarker::Ordered(OrderedMarker::LowerRoman { .. })
932            ),
933            "ii. must stay LowerRoman regardless of hint: got {:?}",
934            m.marker
935        );
936    }
937
938    #[test]
939    fn hint_ignored_in_commonmark_dialect() {
940        // CommonMark doesn't enable fancy_lists, so `i.` isn't recognized as
941        // an ordered marker at all in that dialect. The hint must not change
942        // that outcome.
943        let config = ParserOptions {
944            dialect: crate::Dialect::CommonMark,
945            extensions: crate::options::Extensions {
946                fancy_lists: false,
947                ..Default::default()
948            },
949            ..Default::default()
950        };
951        assert!(
952            try_parse_list_marker("i. foo", &config, OpenListHint::LowerAlpha).is_none(),
953            "i. should not parse as a list marker under CommonMark"
954        );
955    }
956
957    #[test]
958    fn uppercase_i_classified_as_alpha_with_upper_alpha_hint() {
959        let config = ParserOptions::default();
960        // Uppercase + period requires 2 spaces (the I.M.Pei rule).
961        let m = try_parse_list_marker("I.  foo", &config, OpenListHint::UpperAlpha).unwrap();
962        assert!(
963            matches!(
964                m.marker,
965                ListMarker::Ordered(OrderedMarker::UpperAlpha { letter: 'I', .. })
966            ),
967            "I. should classify as UpperAlpha when an UpperAlpha list is open: got {:?}",
968            m.marker
969        );
970    }
971
972    #[test]
973    fn lowercase_hint_does_not_shadow_uppercase_candidate() {
974        let config = ParserOptions::default();
975        let m = try_parse_list_marker("I.  foo", &config, OpenListHint::LowerAlpha).unwrap();
976        assert!(
977            matches!(
978                m.marker,
979                ListMarker::Ordered(OrderedMarker::UpperRoman { .. })
980            ),
981            "I. + LowerAlpha hint must stay UpperRoman (case mismatch): got {:?}",
982            m.marker
983        );
984    }
985
986    #[test]
987    fn parenthesized_single_letter_i_obeys_hint() {
988        let config = ParserOptions::default();
989        let m = try_parse_list_marker("(i) foo", &config, OpenListHint::LowerAlpha).unwrap();
990        assert!(
991            matches!(
992                m.marker,
993                ListMarker::Ordered(OrderedMarker::LowerAlpha { letter: 'i', .. })
994            ),
995            "(i) should classify as LowerAlpha when a LowerAlpha list is open: got {:?}",
996            m.marker
997        );
998    }
999
1000    #[test]
1001    fn open_list_hint_at_indent_lower_alpha_at_same_indent() {
1002        use crate::parser::utils::container_stack::{Container, ContainerStack};
1003        let mut stack = ContainerStack::new();
1004        stack.stack.push(Container::List {
1005            marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
1006                letter: 'a',
1007                style: ListDelimiter::Period,
1008            }),
1009            base_indent_cols: 0,
1010            has_blank_between_items: false,
1011        });
1012        assert_eq!(
1013            open_list_hint_at_indent(&stack, 0),
1014            OpenListHint::LowerAlpha
1015        );
1016    }
1017
1018    #[test]
1019    fn open_list_hint_at_indent_returns_none_when_indent_differs() {
1020        // Protects nested-roman-inside-alpha: an `i.` at indent 3 must NOT
1021        // be reclassified against the outer alpha at indent 0.
1022        use crate::parser::utils::container_stack::{Container, ContainerStack};
1023        let mut stack = ContainerStack::new();
1024        stack.stack.push(Container::List {
1025            marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
1026                letter: 'a',
1027                style: ListDelimiter::Period,
1028            }),
1029            base_indent_cols: 0,
1030            has_blank_between_items: false,
1031        });
1032        assert_eq!(open_list_hint_at_indent(&stack, 3), OpenListHint::None);
1033    }
1034
1035    #[test]
1036    fn open_list_hint_at_indent_returns_none_for_decimal_or_roman() {
1037        use crate::parser::utils::container_stack::{Container, ContainerStack};
1038        let mut stack = ContainerStack::new();
1039        stack.stack.push(Container::List {
1040            marker: ListMarker::Ordered(OrderedMarker::Decimal {
1041                number: "1".to_string(),
1042                style: ListDelimiter::Period,
1043            }),
1044            base_indent_cols: 0,
1045            has_blank_between_items: false,
1046        });
1047        assert_eq!(open_list_hint_at_indent(&stack, 0), OpenListHint::None);
1048
1049        let mut stack = ContainerStack::new();
1050        stack.stack.push(Container::List {
1051            marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
1052                numeral: "i".to_string(),
1053                style: ListDelimiter::Period,
1054            }),
1055            base_indent_cols: 0,
1056            has_blank_between_items: false,
1057        });
1058        assert_eq!(open_list_hint_at_indent(&stack, 0), OpenListHint::None);
1059    }
1060
1061    #[test]
1062    fn open_list_hint_at_indent_stops_at_blockquote_barrier() {
1063        use crate::parser::utils::container_stack::{Container, ContainerStack};
1064        let mut stack = ContainerStack::new();
1065        stack.stack.push(Container::List {
1066            marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
1067                letter: 'a',
1068                style: ListDelimiter::Period,
1069            }),
1070            base_indent_cols: 0,
1071            has_blank_between_items: false,
1072        });
1073        stack.stack.push(Container::BlockQuote {});
1074        // Inside the blockquote at indent 0: the outer alpha must not leak in.
1075        assert_eq!(open_list_hint_at_indent(&stack, 0), OpenListHint::None);
1076    }
1077}
1078
1079#[test]
1080fn markers_match_fancy_lists() {
1081    use ListDelimiter::*;
1082    use ListMarker::*;
1083    use OrderedMarker::*;
1084
1085    // Same type and style should match
1086    let a_period = Ordered(LowerAlpha {
1087        letter: 'a',
1088        style: Period,
1089    });
1090    let b_period = Ordered(LowerAlpha {
1091        letter: 'b',
1092        style: Period,
1093    });
1094    assert!(
1095        markers_match(&a_period, &b_period, crate::Dialect::Pandoc),
1096        "a. and b. should match"
1097    );
1098
1099    let i_period = Ordered(LowerRoman {
1100        numeral: "i".to_string(),
1101        style: Period,
1102    });
1103    let ii_period = Ordered(LowerRoman {
1104        numeral: "ii".to_string(),
1105        style: Period,
1106    });
1107    assert!(
1108        markers_match(&i_period, &ii_period, crate::Dialect::Pandoc),
1109        "i. and ii. should match"
1110    );
1111
1112    // Different styles should not match
1113    let a_paren = Ordered(LowerAlpha {
1114        letter: 'a',
1115        style: RightParen,
1116    });
1117    assert!(
1118        !markers_match(&a_period, &a_paren, crate::Dialect::Pandoc),
1119        "a. and a) should not match"
1120    );
1121}
1122
1123#[test]
1124fn markers_match_bullet_dialect_split() {
1125    use ListMarker::*;
1126    // Pandoc: any bullet matches any bullet (same list).
1127    assert!(markers_match(
1128        &Bullet('-'),
1129        &Bullet('+'),
1130        crate::Dialect::Pandoc
1131    ));
1132    // CommonMark: bullets match only when the marker character is the same.
1133    assert!(markers_match(
1134        &Bullet('-'),
1135        &Bullet('-'),
1136        crate::Dialect::CommonMark
1137    ));
1138    assert!(!markers_match(
1139        &Bullet('-'),
1140        &Bullet('+'),
1141        crate::Dialect::CommonMark
1142    ));
1143    assert!(!markers_match(
1144        &Bullet('*'),
1145        &Bullet('-'),
1146        crate::Dialect::CommonMark
1147    ));
1148}
1149
1150#[test]
1151fn detects_complex_roman_numerals() {
1152    let mut config = ParserOptions::default();
1153    config.extensions.fancy_lists = true;
1154
1155    // Test various Roman numerals
1156    assert!(
1157        try_parse_list_marker("iv. item", &config, OpenListHint::None).is_some(),
1158        "iv. should parse"
1159    );
1160    assert!(
1161        try_parse_list_marker("v. item", &config, OpenListHint::None).is_some(),
1162        "v. should parse"
1163    );
1164    assert!(
1165        try_parse_list_marker("vi. item", &config, OpenListHint::None).is_some(),
1166        "vi. should parse"
1167    );
1168    assert!(
1169        try_parse_list_marker("vii. item", &config, OpenListHint::None).is_some(),
1170        "vii. should parse"
1171    );
1172    assert!(
1173        try_parse_list_marker("viii. item", &config, OpenListHint::None).is_some(),
1174        "viii. should parse"
1175    );
1176    assert!(
1177        try_parse_list_marker("ix. item", &config, OpenListHint::None).is_some(),
1178        "ix. should parse"
1179    );
1180    assert!(
1181        try_parse_list_marker("x. item", &config, OpenListHint::None).is_some(),
1182        "x. should parse"
1183    );
1184}
1185
1186#[test]
1187fn detects_example_list_markers() {
1188    let mut config = ParserOptions::default();
1189    config.extensions.example_lists = true;
1190
1191    // Test unlabeled example
1192    assert!(
1193        try_parse_list_marker("(@) item", &config, OpenListHint::None).is_some(),
1194        "(@) should parse"
1195    );
1196
1197    // Test labeled examples
1198    assert!(
1199        try_parse_list_marker("(@foo) item", &config, OpenListHint::None).is_some(),
1200        "(@foo) should parse"
1201    );
1202    assert!(
1203        try_parse_list_marker("(@my_label) item", &config, OpenListHint::None).is_some(),
1204        "(@my_label) should parse"
1205    );
1206    assert!(
1207        try_parse_list_marker("(@test-123) item", &config, OpenListHint::None).is_some(),
1208        "(@test-123) should parse"
1209    );
1210
1211    // Test with extension disabled
1212    let disabled_config = ParserOptions {
1213        extensions: crate::options::Extensions {
1214            example_lists: false,
1215            ..Default::default()
1216        },
1217        ..Default::default()
1218    };
1219    assert!(
1220        try_parse_list_marker("(@) item", &disabled_config, OpenListHint::None).is_none(),
1221        "(@) should not parse when extension disabled"
1222    );
1223}
1224
1225#[test]
1226fn deep_ordered_prefers_nearest_enclosing_indent_over_nearest_below() {
1227    use crate::parser::utils::container_stack::{Container, ContainerStack};
1228
1229    let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
1230        numeral: "ii".to_string(),
1231        style: ListDelimiter::Period,
1232    });
1233
1234    let mut containers = ContainerStack::new();
1235    containers.push(Container::List {
1236        marker: marker.clone(),
1237        base_indent_cols: 8,
1238        has_blank_between_items: false,
1239    });
1240    containers.push(Container::ListItem {
1241        content_col: 11,
1242        buffer: crate::parser::utils::list_item_buffer::ListItemBuffer::new(),
1243        marker_only: false,
1244        virtual_marker_space: false,
1245    });
1246    containers.push(Container::List {
1247        marker,
1248        base_indent_cols: 6,
1249        has_blank_between_items: false,
1250    });
1251
1252    // With deep ordered drift (indent 7), we should keep the enclosing level
1253    // (base indent 8), not re-associate to the nearest lower sibling level (6).
1254    assert_eq!(
1255        find_matching_list_level(
1256            &containers,
1257            &ListMarker::Ordered(OrderedMarker::LowerRoman {
1258                numeral: "iii".to_string(),
1259                style: ListDelimiter::Period,
1260            }),
1261            7,
1262            crate::Dialect::Pandoc,
1263        ),
1264        Some(0)
1265    );
1266}
1267
1268#[test]
1269fn deep_ordered_matches_exact_indent_when_available() {
1270    use crate::parser::utils::container_stack::{Container, ContainerStack};
1271
1272    let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
1273        numeral: "ii".to_string(),
1274        style: ListDelimiter::Period,
1275    });
1276
1277    let mut containers = ContainerStack::new();
1278    containers.push(Container::List {
1279        marker: marker.clone(),
1280        base_indent_cols: 8,
1281        has_blank_between_items: false,
1282    });
1283    containers.push(Container::List {
1284        marker,
1285        base_indent_cols: 6,
1286        has_blank_between_items: false,
1287    });
1288
1289    assert_eq!(
1290        find_matching_list_level(
1291            &containers,
1292            &ListMarker::Ordered(OrderedMarker::LowerRoman {
1293                numeral: "iii".to_string(),
1294                style: ListDelimiter::Period,
1295            }),
1296            6,
1297            crate::Dialect::Pandoc,
1298        ),
1299        Some(1)
1300    );
1301}
1302
1303#[test]
1304fn parses_nested_bullet_list_from_single_marker() {
1305    use crate::parse;
1306    use crate::syntax::SyntaxKind;
1307
1308    let config = ParserOptions::default();
1309
1310    // Test all three bullet marker combinations as nested lists
1311    for (input, desc) in [("- *\n", "- *"), ("- +\n", "- +"), ("- -\n", "- -")] {
1312        let tree = parse(input, Some(config.clone()));
1313
1314        // tree IS the DOCUMENT node
1315        assert_eq!(
1316            tree.kind(),
1317            SyntaxKind::DOCUMENT,
1318            "{desc}: root should be DOCUMENT"
1319        );
1320
1321        // Should have a LIST as first child of DOCUMENT
1322        let outer_list = tree
1323            .children()
1324            .find(|n| n.kind() == SyntaxKind::LIST)
1325            .unwrap_or_else(|| panic!("{desc}: should have outer LIST node"));
1326
1327        // Outer list should have a LIST_ITEM
1328        let outer_item = outer_list
1329            .children()
1330            .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
1331            .unwrap_or_else(|| panic!("{desc}: should have outer LIST_ITEM"));
1332
1333        // Outer list item should contain a nested LIST (not PLAIN with TEXT)
1334        let nested_list = outer_item
1335            .children()
1336            .find(|n| n.kind() == SyntaxKind::LIST)
1337            .unwrap_or_else(|| {
1338                panic!(
1339                    "{desc}: outer LIST_ITEM should contain nested LIST, got: {:?}",
1340                    outer_item.children().map(|n| n.kind()).collect::<Vec<_>>()
1341                )
1342            });
1343
1344        // Nested list should have a LIST_ITEM
1345        let nested_item = nested_list
1346            .children()
1347            .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
1348            .unwrap_or_else(|| panic!("{desc}: nested LIST should have LIST_ITEM"));
1349
1350        // Nested list item should be empty (no PLAIN or TEXT content)
1351        let has_plain = nested_item
1352            .children()
1353            .any(|n| n.kind() == SyntaxKind::PLAIN);
1354        assert!(
1355            !has_plain,
1356            "{desc}: nested LIST_ITEM should not have PLAIN node (should be empty)"
1357        );
1358    }
1359}
1360
1361// Helper functions for list management in Parser
1362
1363/// Check if we're in any list.
1364pub(in crate::parser) fn in_list(containers: &ContainerStack) -> bool {
1365    containers
1366        .stack
1367        .iter()
1368        .any(|c| matches!(c, Container::List { .. }))
1369}
1370
1371/// Check if we're in a list inside a blockquote.
1372pub(in crate::parser) fn in_blockquote_list(containers: &ContainerStack) -> bool {
1373    let mut seen_blockquote = false;
1374    for c in &containers.stack {
1375        if matches!(c, Container::BlockQuote { .. }) {
1376            seen_blockquote = true;
1377        }
1378        if seen_blockquote && matches!(c, Container::List { .. }) {
1379            return true;
1380        }
1381    }
1382    false
1383}
1384
1385/// Return the kind of open alphabetic list at exactly `indent_cols`, if any.
1386///
1387/// Walks the container stack from deepest to shallowest, stopping at a
1388/// `Container::BlockQuote` barrier (mirrors `find_matching_list_level`'s
1389/// barrier behavior so a list outside a blockquote can't influence
1390/// classification inside one). Returns `OpenListHint::None` for any
1391/// non-alpha marker or when no list is open at the queried indent.
1392///
1393/// Used by `try_parse_list_marker` to disambiguate single-letter Roman
1394/// candidates {i,v,x,I,V,X} against an open alpha list in Pandoc dialect.
1395/// The exact-indent gate is what protects nested Roman-inside-alpha
1396/// sublists like `a.\n   i.` — there the inner `i.` lives at a deeper
1397/// indent than the outer alpha base, so this returns `None` and Roman
1398/// classification wins.
1399pub(in crate::parser) fn open_list_hint_at_indent(
1400    containers: &ContainerStack,
1401    indent_cols: usize,
1402) -> OpenListHint {
1403    for c in containers.stack.iter().rev() {
1404        if matches!(c, Container::BlockQuote { .. }) {
1405            return OpenListHint::None;
1406        }
1407        if let Container::List {
1408            marker,
1409            base_indent_cols,
1410            ..
1411        } = c
1412            && *base_indent_cols == indent_cols
1413        {
1414            return match marker {
1415                ListMarker::Ordered(OrderedMarker::LowerAlpha { .. }) => OpenListHint::LowerAlpha,
1416                ListMarker::Ordered(OrderedMarker::UpperAlpha { .. }) => OpenListHint::UpperAlpha,
1417                _ => OpenListHint::None,
1418            };
1419        }
1420    }
1421    OpenListHint::None
1422}
1423
1424/// Find matching list level for a marker with the given indent.
1425pub(in crate::parser) fn find_matching_list_level(
1426    containers: &ContainerStack,
1427    marker: &ListMarker,
1428    indent_cols: usize,
1429    dialect: crate::Dialect,
1430) -> Option<usize> {
1431    // Search from deepest (last) to shallowest (first)
1432    // But for shallow items (0-3 indent), prefer matching at the closest base indent
1433    let mut best_match: Option<(usize, usize, bool)> = None; // (index, distance, base_leq_indent)
1434
1435    let is_deep_ordered = matches!(marker, ListMarker::Ordered(_)) && indent_cols >= 4;
1436    let mut best_above_match: Option<(usize, usize)> = None; // (index, delta = base - indent), ordered deep only
1437
1438    for (i, c) in containers.stack.iter().enumerate().rev() {
1439        // BlockQuote acts as a list-continuation barrier. A list outside a
1440        // BlockQuote can't be continued from inside the BlockQuote — opening
1441        // a BlockQuote starts a new container "world". Without this stop,
1442        // `- intro\n\n  > - 0:` matches the outer `-` list and closes the
1443        // freshly-opened BlockQuote (issue #292). Pandoc-native treats the
1444        // inner list as a child of the BlockQuote.
1445        if matches!(c, Container::BlockQuote { .. }) {
1446            break;
1447        }
1448        if let Container::List {
1449            marker: list_marker,
1450            base_indent_cols,
1451            ..
1452        } = c
1453            && markers_match(marker, list_marker, dialect)
1454        {
1455            let matches = if indent_cols >= 4 && *base_indent_cols >= 4 {
1456                // Deep indentation:
1457                // - bullets stay directional to preserve nesting boundaries
1458                // - ordered markers allow small symmetric drift to keep
1459                //   marker-width-aligned lists (i./ii./iii.) at one level
1460                match (marker, list_marker) {
1461                    (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
1462                        indent_cols.abs_diff(*base_indent_cols) <= 3
1463                    }
1464                    _ => indent_cols >= *base_indent_cols && indent_cols <= base_indent_cols + 3,
1465                }
1466            } else if indent_cols >= 4 || *base_indent_cols >= 4 {
1467                // One shallow, one deep:
1468                // - ordered markers still allow symmetric drift so aligned roman
1469                //   markers (e.g. 3/4/5 spaces for i./ii./iii.) stay at one level
1470                // - bullets remain directional to preserve nesting boundaries
1471                match (marker, list_marker) {
1472                    (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
1473                        indent_cols.abs_diff(*base_indent_cols) <= 3
1474                    }
1475                    _ => false,
1476                }
1477            } else {
1478                // Both at shallow indentation (0-3)
1479                // Allow items within 3 spaces
1480                indent_cols.abs_diff(*base_indent_cols) <= 3
1481            };
1482
1483            if matches {
1484                let distance = indent_cols.abs_diff(*base_indent_cols);
1485                let base_leq_indent = *base_indent_cols <= indent_cols;
1486
1487                // For deep ordered lists, avoid "nearest below" re-association caused by
1488                // formatter alignment shifts (e.g. i./ii./iii. becoming 6/7/8-space indents).
1489                // Prefer matching the nearest enclosing level whose base indent is >= current.
1490                if is_deep_ordered
1491                    && matches!(
1492                        (marker, list_marker),
1493                        (ListMarker::Ordered(_), ListMarker::Ordered(_))
1494                    )
1495                    && *base_indent_cols >= indent_cols
1496                {
1497                    let delta = *base_indent_cols - indent_cols;
1498                    if best_above_match.is_none_or(|(_, best_delta)| delta < best_delta) {
1499                        best_above_match = Some((i, delta));
1500                    }
1501                }
1502
1503                if let Some((_, best_dist, best_base_leq)) = best_match {
1504                    if distance < best_dist
1505                        || (distance == best_dist && base_leq_indent && !best_base_leq)
1506                    {
1507                        best_match = Some((i, distance, base_leq_indent));
1508                    }
1509                } else {
1510                    best_match = Some((i, distance, base_leq_indent));
1511                }
1512
1513                // If we found an exact match, return immediately
1514                if distance == 0 {
1515                    return Some(i);
1516                }
1517            }
1518        }
1519    }
1520
1521    if let Some((index, _)) = best_above_match {
1522        return Some(index);
1523    }
1524
1525    best_match.map(|(i, _, _)| i)
1526}
1527
1528/// Start a nested list within an existing list item.
1529pub(in crate::parser) fn start_nested_list(
1530    containers: &mut ContainerStack,
1531    builder: &mut GreenNodeBuilder<'static>,
1532    marker: &ListMarker,
1533    item: &ListItemEmissionInput<'_>,
1534    indent_to_emit: Option<&str>,
1535    config: &ParserOptions,
1536) -> ListItemFinish {
1537    // Emit the indent if needed
1538    if let Some(indent_str) = indent_to_emit {
1539        builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1540    }
1541
1542    // Start nested list
1543    builder.start_node(SyntaxKind::LIST.into());
1544    containers.push(Container::List {
1545        marker: marker.clone(),
1546        base_indent_cols: item.indent_cols,
1547        has_blank_between_items: false,
1548    });
1549
1550    // Add the nested list item
1551    let (content_col, text_to_buffer) = emit_list_item(builder, item, config);
1552    finish_list_item_with_optional_nested(
1553        containers,
1554        builder,
1555        content_col,
1556        text_to_buffer,
1557        item.virtual_marker_space,
1558        config,
1559    )
1560}
1561
1562/// Checks if the content after a list marker is exactly another bullet marker.
1563/// Returns the nested bullet marker character if detected.
1564pub(in crate::parser) fn is_content_nested_bullet_marker(
1565    content: &str,
1566    marker_len: usize,
1567    spaces_after_bytes: usize,
1568) -> Option<char> {
1569    let (_, indent_bytes) = leading_indent(content);
1570    let content_start = indent_bytes + marker_len + spaces_after_bytes;
1571
1572    if content_start >= content.len() {
1573        return None;
1574    }
1575
1576    let remaining = &content[content_start..];
1577    let (text_part, _) = strip_newline(remaining);
1578    let trimmed = text_part.trim();
1579
1580    // Check if it's exactly one of the bullet marker characters
1581    if trimmed.len() == 1 {
1582        let ch = trimmed.chars().next().unwrap();
1583        if matches!(ch, '*' | '+' | '-') {
1584            return Some(ch);
1585        }
1586    }
1587
1588    None
1589}
1590
1591/// Add a list item that contains a nested empty list (for cases like `- *`).
1592/// This creates: LIST_ITEM (outer) -> LIST (nested) -> LIST_ITEM (empty inner)
1593pub(in crate::parser) fn add_list_item_with_nested_empty_list(
1594    containers: &mut ContainerStack,
1595    builder: &mut GreenNodeBuilder<'static>,
1596    item: &ListItemEmissionInput<'_>,
1597    nested_marker: char,
1598    config: &ParserOptions,
1599) {
1600    // First, emit the outer list item (just marker + whitespace)
1601    builder.start_node(SyntaxKind::LIST_ITEM.into());
1602
1603    // Emit leading indentation for lossless parsing
1604    if item.indent_bytes > 0 {
1605        builder.token(
1606            SyntaxKind::WHITESPACE.into(),
1607            &item.content[..item.indent_bytes],
1608        );
1609    }
1610
1611    let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
1612    builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
1613
1614    if item.spaces_after_bytes > 0 {
1615        let space_start = item.indent_bytes + item.marker_len;
1616        let space_end = space_start + item.spaces_after_bytes;
1617        if space_end <= item.content.len() {
1618            builder.token(
1619                SyntaxKind::WHITESPACE.into(),
1620                &item.content[space_start..space_end],
1621            );
1622        }
1623    }
1624
1625    // Now start the nested list inside this item
1626    builder.start_node(SyntaxKind::LIST.into());
1627
1628    // Add empty list item to the nested list
1629    builder.start_node(SyntaxKind::LIST_ITEM.into());
1630    builder.token(SyntaxKind::LIST_MARKER.into(), &nested_marker.to_string());
1631
1632    // Extract and emit the newline from original content (lossless)
1633    let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
1634    if content_start < item.content.len() {
1635        let remaining = &item.content[content_start..];
1636        // Skip the nested marker character (1 byte) and get the newline
1637        if remaining.len() > 1 {
1638            let (_, newline_str) = strip_newline(&remaining[1..]);
1639            if !newline_str.is_empty() {
1640                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1641            }
1642        }
1643    }
1644
1645    builder.finish_node(); // Close nested LIST_ITEM
1646    builder.finish_node(); // Close nested LIST
1647
1648    // Push container for the outer list item
1649    let content_col = list_item_content_col(
1650        item.indent_cols,
1651        item.marker_len,
1652        item.spaces_after_cols,
1653        config,
1654    );
1655    containers.push(Container::ListItem {
1656        content_col,
1657        buffer: ListItemBuffer::new(),
1658        marker_only: false, // The nested LIST counts as real content.
1659        virtual_marker_space: item.virtual_marker_space,
1660    });
1661}
1662
1663/// Add a list item to the current list.
1664pub(in crate::parser) fn add_list_item(
1665    containers: &mut ContainerStack,
1666    builder: &mut GreenNodeBuilder<'static>,
1667    item: &ListItemEmissionInput<'_>,
1668    config: &ParserOptions,
1669) -> ListItemFinish {
1670    let (content_col, text_to_buffer) = emit_list_item(builder, item, config);
1671
1672    log::trace!(
1673        "add_list_item: content={:?}, text_to_buffer={:?}",
1674        item.content,
1675        text_to_buffer
1676    );
1677
1678    finish_list_item_with_optional_nested(
1679        containers,
1680        builder,
1681        content_col,
1682        text_to_buffer,
1683        item.virtual_marker_space,
1684        config,
1685    )
1686}
1687
1688/// Finish a list item by either buffering its content or, when the buffered
1689/// content begins with another list marker followed by content, recursively
1690/// opening a nested LIST with another LIST_ITEM. Pushes the appropriate
1691/// containers onto the stack so the caller doesn't need to.
1692fn finish_list_item_with_optional_nested(
1693    containers: &mut ContainerStack,
1694    builder: &mut GreenNodeBuilder<'static>,
1695    content_col: usize,
1696    text_to_buffer: String,
1697    virtual_marker_space: bool,
1698    config: &ParserOptions,
1699) -> ListItemFinish {
1700    // A line whose content is a thematic break (e.g. `* * *`) takes precedence
1701    // over being parsed as a sequence of nested list markers. Both dialects
1702    // agree: `- * * *` is a list item containing a thematic break, not a
1703    // chain of bullets.
1704    let buffered_is_thematic_break =
1705        super::horizontal_rules::try_parse_horizontal_rule(trim_end_newlines(&text_to_buffer))
1706            .is_some();
1707
1708    // Recursive same-line nested list emission applies to both dialects:
1709    // pandoc-markdown and CommonMark agree on the nested LIST_ITEM shape
1710    // for `- - foo`, `1. - 2. foo`, etc. (verified via `pandoc -f markdown
1711    // -t native` and `pandoc -f commonmark -t native`). The companion
1712    // formatter arm in `format_list_item` handles the LIST-first-child
1713    // shape so the round-trip stays idempotent.
1714
1715    if !buffered_is_thematic_break
1716        && let Some(inner_match) =
1717            try_parse_list_marker(&text_to_buffer, config, OpenListHint::None)
1718    {
1719        let inner_content_start = inner_match.marker_len + inner_match.spaces_after_bytes;
1720        let after_inner =
1721            trim_end_newlines(text_to_buffer.get(inner_content_start..).unwrap_or(""));
1722        // Recurse only when there is real content after the inner marker.
1723        // The bare-inner-marker case (e.g. `- *`) is handled by the existing
1724        // `add_list_item_with_nested_empty_list` path.
1725        if !after_inner.is_empty() {
1726            // Push outer ListItem with empty buffer.
1727            containers.push(Container::ListItem {
1728                content_col,
1729                buffer: ListItemBuffer::new(),
1730                marker_only: false, // The nested LIST counts as real content.
1731                virtual_marker_space,
1732            });
1733            // Open nested LIST inside the outer LIST_ITEM.
1734            builder.start_node(SyntaxKind::LIST.into());
1735            containers.push(Container::List {
1736                marker: inner_match.marker.clone(),
1737                base_indent_cols: content_col,
1738                has_blank_between_items: false,
1739            });
1740            // Emit nested LIST_ITEM via emit_list_item, then recurse on its
1741            // content for further-nested same-line markers.
1742            let inner_item = ListItemEmissionInput {
1743                content: text_to_buffer.as_str(),
1744                marker_len: inner_match.marker_len,
1745                spaces_after_cols: inner_match.spaces_after_cols,
1746                spaces_after_bytes: inner_match.spaces_after_bytes,
1747                indent_cols: content_col,
1748                indent_bytes: 0,
1749                virtual_marker_space: inner_match.virtual_marker_space,
1750            };
1751            let (inner_content_col, inner_text_to_buffer) =
1752                emit_list_item(builder, &inner_item, config);
1753            // Recursive call is for nested same-line markers (`- - foo`);
1754            // the inner content doesn't begin with `>` so no BqDispatch can
1755            // propagate up. Discard the result.
1756            let _ = finish_list_item_with_optional_nested(
1757                containers,
1758                builder,
1759                inner_content_col,
1760                inner_text_to_buffer,
1761                inner_match.virtual_marker_space,
1762                config,
1763            );
1764            return ListItemFinish::Done;
1765        }
1766    }
1767
1768    // Same-line blockquote marker inside a list item: `1. > Blockquote`
1769    // opens a BLOCK_QUOTE inside the LIST_ITEM, with the post-marker text
1770    // becoming the first line of the blockquote's paragraph. Both
1771    // CommonMark and Pandoc-markdown agree on this shape (verified via
1772    // `pandoc -f commonmark` and `pandoc -f markdown`). The companion
1773    // arm in `format_list_item` emits the LIST_MARKER and the BLOCK_QUOTE
1774    // contents on the same output line so the round-trip stays
1775    // idempotent.
1776    if !buffered_is_thematic_break
1777        && text_to_buffer.starts_with('>')
1778        && !text_to_buffer.starts_with(">>")
1779    {
1780        let bytes = text_to_buffer.as_bytes();
1781        let has_trailing_space = bytes.get(1).copied() == Some(b' ');
1782        let content_offset = if has_trailing_space { 2 } else { 1 };
1783        let remaining = &text_to_buffer[content_offset..];
1784
1785        // Push outer ListItem with empty buffer; the inner BLOCK_QUOTE
1786        // counts as real content so `marker_only` is false.
1787        containers.push(Container::ListItem {
1788            content_col,
1789            buffer: ListItemBuffer::new(),
1790            marker_only: false,
1791            virtual_marker_space,
1792        });
1793
1794        // Open BLOCK_QUOTE node inside the LIST_ITEM and emit the marker.
1795        builder.start_node(SyntaxKind::BLOCK_QUOTE.into());
1796        builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
1797        if has_trailing_space {
1798            builder.token(SyntaxKind::WHITESPACE.into(), " ");
1799        }
1800        containers.push(Container::BlockQuote {});
1801
1802        let trimmed = trim_end_newlines(remaining);
1803
1804        // If the BlockQuote content begins with another list marker
1805        // followed by real content, recursively open a nested LIST inside
1806        // the BLOCK_QUOTE. Both Pandoc-markdown and CommonMark agree:
1807        // `- > - foo` produces
1808        // `BulletList [BlockQuote [BulletList [[Plain "foo"]]]]`
1809        // (verified via `pandoc -f markdown` and `pandoc -f commonmark`).
1810        let inner_is_thematic_break =
1811            super::horizontal_rules::try_parse_horizontal_rule(trimmed).is_some();
1812        if !inner_is_thematic_break
1813            && let Some(inner_match) = try_parse_list_marker(remaining, config, OpenListHint::None)
1814        {
1815            let inner_content_start = inner_match.marker_len + inner_match.spaces_after_bytes;
1816            let after_inner = trim_end_newlines(remaining.get(inner_content_start..).unwrap_or(""));
1817            if !after_inner.is_empty() {
1818                let bq_content_col = content_col + content_offset;
1819                builder.start_node(SyntaxKind::LIST.into());
1820                containers.push(Container::List {
1821                    marker: inner_match.marker.clone(),
1822                    base_indent_cols: bq_content_col,
1823                    has_blank_between_items: false,
1824                });
1825                let inner_item = ListItemEmissionInput {
1826                    content: remaining,
1827                    marker_len: inner_match.marker_len,
1828                    spaces_after_cols: inner_match.spaces_after_cols,
1829                    spaces_after_bytes: inner_match.spaces_after_bytes,
1830                    indent_cols: bq_content_col,
1831                    indent_bytes: 0,
1832                    virtual_marker_space: inner_match.virtual_marker_space,
1833                };
1834                let (inner_content_col, inner_text_to_buffer) =
1835                    emit_list_item(builder, &inner_item, config);
1836                // Same as above: inner content doesn't start with `>` so no
1837                // BqDispatch can propagate.
1838                let _ = finish_list_item_with_optional_nested(
1839                    containers,
1840                    builder,
1841                    inner_content_col,
1842                    inner_text_to_buffer,
1843                    inner_match.virtual_marker_space,
1844                    config,
1845                );
1846                return ListItemFinish::Done;
1847            }
1848        }
1849
1850        // If there is content after `> `, hand it back to the caller so the
1851        // parser's block dispatcher can recognize block-level constructs
1852        // (HTML blocks, ATX headings, fenced code, …) instead of wrapping
1853        // the first line in a stray paragraph. Subsequent lines continue
1854        // via the parser's main loop (lazy continuation handles the
1855        // no-marker continuation line in cases like #292).
1856        if !trimmed.is_empty() {
1857            return ListItemFinish::BqDispatch {
1858                content: remaining.to_string(),
1859            };
1860        }
1861        return ListItemFinish::Done;
1862    }
1863
1864    let marker_only = text_to_buffer.trim().is_empty();
1865    let mut buffer = ListItemBuffer::new();
1866    if !text_to_buffer.is_empty() {
1867        buffer.push_text(text_to_buffer);
1868    }
1869    containers.push(Container::ListItem {
1870        content_col,
1871        buffer,
1872        marker_only,
1873        virtual_marker_space,
1874    });
1875    ListItemFinish::Done
1876}