Skip to main content

panache_parser/parser/blocks/
lists.rs

1use crate::options::ParserOptions;
2use crate::syntax::SyntaxKind;
3use rowan::GreenNodeBuilder;
4
5use crate::parser::utils::container_stack::{
6    Container, ContainerStack, leading_indent, leading_indent_from,
7};
8use crate::parser::utils::helpers::{strip_newline, trim_end_newlines};
9use crate::parser::utils::list_item_buffer::ListItemBuffer;
10
11/// Signal returned by `add_list_item` / `finish_list_item_with_optional_nested`
12/// so the caller can decide how to handle leftover first-line content.
13///
14/// `BqDispatch` fires when the list item opens an inner BLOCK_QUOTE on the same
15/// line (`- > <content>`) and the post-`> ` content is non-empty and not itself
16/// a list marker. The caller is responsible for dispatching `content` through
17/// the block parser (typically `Parser::parse_inner_content`) so block-level
18/// constructs like HTML blocks or headings are recognized rather than wrapped
19/// in a stray paragraph.
20pub(in crate::parser) enum ListItemFinish {
21    Done,
22    BqDispatch { content: String },
23}
24
25#[derive(Debug, Clone, PartialEq)]
26pub(crate) enum ListMarker {
27    Bullet(char),
28    Ordered(OrderedMarker),
29}
30
31#[derive(Debug, Clone, PartialEq)]
32pub(crate) enum OrderedMarker {
33    Decimal {
34        number: String,
35        style: ListDelimiter,
36    },
37    Hash,
38    LowerAlpha {
39        letter: char,
40        style: ListDelimiter,
41    },
42    UpperAlpha {
43        letter: char,
44        style: ListDelimiter,
45    },
46    LowerRoman {
47        numeral: String,
48        style: ListDelimiter,
49    },
50    UpperRoman {
51        numeral: String,
52        style: ListDelimiter,
53    },
54    Example {
55        label: Option<String>,
56    },
57}
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq)]
60pub(crate) enum ListDelimiter {
61    Period,
62    RightParen,
63    Parens,
64}
65
66/// Context hint for marker detection: the kind of open alphabetic list (if
67/// any) at the candidate line's indent column. Used to disambiguate
68/// single-letter Roman candidates {i,v,x,I,V,X} from their letter
69/// interpretation in Pandoc-dialect input. Pandoc parses `a. … h. … i. … j.`
70/// as a single LowerAlpha list (the `i.` after the blank line continues as
71/// the letter `i`, not as Roman numeral 1). Marker detection needs this
72/// signal to make that classification in a single pass.
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
74pub(crate) enum OpenListHint {
75    #[default]
76    None,
77    LowerAlpha,
78    UpperAlpha,
79}
80
81#[derive(Debug, Clone, PartialEq)]
82pub(crate) struct ListMarkerMatch {
83    pub(crate) marker: ListMarker,
84    pub(crate) marker_len: usize,
85    pub(crate) spaces_after_cols: usize,
86    pub(crate) spaces_after_bytes: usize,
87    /// True when CommonMark's "≥ 5 cols of post-marker whitespace → marker + 1
88    /// virtual space; rest belongs to content" rule fired during marker
89    /// detection. The marker's required 1 col of trailing space was virtually
90    /// absorbed (typically from a tab) rather than consumed as a literal byte;
91    /// the surplus whitespace is left in the post-marker text so block-level
92    /// detection can recognize it as an indented code block.
93    pub(crate) virtual_marker_space: bool,
94}
95
96#[derive(Debug, Clone, Copy)]
97pub(in crate::parser) struct ListItemEmissionInput<'a> {
98    pub content: &'a str,
99    pub marker_len: usize,
100    pub spaces_after_cols: usize,
101    pub spaces_after_bytes: usize,
102    pub indent_cols: usize,
103    pub indent_bytes: usize,
104    pub virtual_marker_space: bool,
105}
106
107/// Parse a Roman numeral (lower or upper case).
108/// Returns the byte-length of the numeral if valid, None otherwise.
109///
110/// Byte-level and allocation-free. Callers (`try_parse_list_marker` for
111/// fancy-list ordering) hit this on every line, so the prior path —
112/// `to_uppercase` String + repeated `Vec<char>::collect` + an always-
113/// allocated `String` return — was a profile hotspot. All Roman numeral
114/// chars are ASCII; map to canonical-upper byte via `b & !0x20` and
115/// validate without heap traffic. Callers slice the original input
116/// only on a confirmed full match (when the trailing `.` / `)` is
117/// also present), so the `String` cost is moved off the no-match path.
118fn try_parse_roman_numeral(text: &str, uppercase: bool) -> Option<usize> {
119    let bytes = text.as_bytes();
120    // Take while ASCII char is one of `IVXLCDM` (case-folded).
121    let mut count = 0usize;
122    while count < bytes.len() {
123        let b = bytes[count];
124        let valid = if uppercase {
125            matches!(b, b'I' | b'V' | b'X' | b'L' | b'C' | b'D' | b'M')
126        } else {
127            matches!(b, b'i' | b'v' | b'x' | b'l' | b'c' | b'd' | b'm')
128        };
129        if !valid {
130            break;
131        }
132        count += 1;
133    }
134
135    if count == 0 {
136        return None;
137    }
138
139    // For single-character numerals, only accept the most common ones to avoid
140    // ambiguity with alphabetic list markers (a-z, A-Z).
141    if count == 1 {
142        let upper = bytes[0] & !0x20;
143        if !matches!(upper, b'I' | b'V' | b'X') {
144            return None;
145        }
146    }
147
148    // Reject sequences of >= 4 consecutive same chars (case-insensitive).
149    // Also reject doubled V/L/D (only ever appear once in valid Romans).
150    let mut run_byte = 0u8;
151    let mut run_len = 0usize;
152    for &b in &bytes[..count] {
153        let upper = b & !0x20;
154        if upper == run_byte {
155            run_len += 1;
156        } else {
157            run_byte = upper;
158            run_len = 1;
159        }
160        if (run_len > 3 && matches!(upper, b'I' | b'X' | b'C'))
161            || (run_len > 1 && matches!(upper, b'V' | b'L' | b'D'))
162        {
163            return None;
164        }
165    }
166
167    // Validate subtractive notation: V/L/D can never precede a larger
168    // numeral; I, X, C only precede the next two larger units.
169    fn val(upper: u8) -> u32 {
170        match upper {
171            b'I' => 1,
172            b'V' => 5,
173            b'X' => 10,
174            b'L' => 50,
175            b'C' => 100,
176            b'D' => 500,
177            b'M' => 1000,
178            _ => 0,
179        }
180    }
181    for i in 0..count.saturating_sub(1) {
182        let curr = bytes[i] & !0x20;
183        let next = bytes[i + 1] & !0x20;
184        let cv = val(curr);
185        let nv = val(next);
186        if cv < nv {
187            match (curr, next) {
188                (b'I', b'V') | (b'I', b'X') => {}
189                (b'X', b'L') | (b'X', b'C') => {}
190                (b'C', b'D') | (b'C', b'M') => {}
191                _ => return None,
192            }
193        }
194    }
195    Some(count)
196}
197
198/// Compute (spaces_after_cols, spaces_after_bytes, virtual_marker_space) for a
199/// post-marker string starting at column `marker_end_col` of the source line.
200///
201/// Implements CommonMark §5.2 rule #2: when the effective column-width of the
202/// post-marker whitespace (counted with tabs expanding from `marker_end_col`)
203/// is ≥ 5 and there is non-empty content after it, the list item's content
204/// column is `marker_end_col + 1` (the marker plus exactly one — possibly
205/// virtual — space). The surplus whitespace is left in the post-marker text
206/// so block-level dispatch can recognize it as an indented code block.
207///
208/// In the rule case, when the first byte is a tab whose source-column span
209/// exceeds 1, no bytes are consumed (the tab stays in content) and
210/// `virtual_marker_space` is true. Otherwise the byte count describes the
211/// literal whitespace consumed as marker space.
212fn marker_spaces_after(after_marker: &str, marker_end_col: usize) -> (usize, usize, bool) {
213    let (effective_cols, n_bytes) = leading_indent_from(after_marker, marker_end_col);
214    let after_ws = &after_marker[n_bytes..];
215    let has_content = !trim_end_newlines(after_ws).is_empty();
216    if has_content && effective_cols >= 5 {
217        let bytes = match after_marker.as_bytes().first() {
218            Some(b' ') => 1,
219            Some(b'\t') => {
220                let span = 4 - (marker_end_col % 4);
221                if span == 1 { 1 } else { 0 }
222            }
223            _ => 0,
224        };
225        (1, bytes, bytes == 0)
226    } else {
227        (effective_cols, n_bytes, false)
228    }
229}
230
231/// Pandoc-dialect single-pass disambiguation: when a single-letter Roman
232/// candidate `{i,v,x}` / `{I,V,X}` would shadow an open same-case alpha
233/// list, reject the Roman classification so detection falls through to the
234/// alpha branch. `numeral_bytes` is the buffer the Roman parser just
235/// validated; `len` is its byte-length. The check fires only for `len == 1`
236/// (multi-character romans like `ii.` are unambiguously Roman) and only in
237/// Pandoc dialect.
238fn single_char_roman_shadowed_by_alpha(
239    numeral_bytes: &[u8],
240    len: usize,
241    uppercase: bool,
242    hint: OpenListHint,
243    dialect: crate::Dialect,
244) -> bool {
245    if dialect != crate::Dialect::Pandoc || len != 1 {
246        return false;
247    }
248    match (uppercase, hint) {
249        (false, OpenListHint::LowerAlpha) => {
250            matches!(numeral_bytes[0], b'i' | b'v' | b'x')
251        }
252        (true, OpenListHint::UpperAlpha) => {
253            matches!(numeral_bytes[0], b'I' | b'V' | b'X')
254        }
255        _ => false,
256    }
257}
258
259pub(crate) fn try_parse_list_marker(
260    line: &str,
261    config: &ParserOptions,
262    open_alpha_hint: OpenListHint,
263) -> Option<ListMarkerMatch> {
264    // Trailing newlines should not block bare-marker detection; the line `*\n`
265    // is a bare bullet marker and the post-marker text is logically empty.
266    let line = trim_end_newlines(line);
267    let (_indent_cols, indent_bytes) = leading_indent(line);
268    let trimmed = &line[indent_bytes..];
269
270    // Try bullet markers (including task lists)
271    if let Some(ch) = trimmed.chars().next()
272        && matches!(ch, '*' | '+' | '-')
273    {
274        let after_marker = &trimmed[1..];
275
276        // Check for task list: [ ] or [x] or [X]
277        let trimmed_after = after_marker.trim_start();
278        let is_task = trimmed_after.starts_with('[')
279            && trimmed_after.len() >= 3
280            && matches!(
281                trimmed_after.chars().nth(1),
282                Some(' ') | Some('x') | Some('X')
283            )
284            && trimmed_after.chars().nth(2) == Some(']');
285
286        // Must be followed by whitespace (or be task list)
287        if after_marker.starts_with(' ')
288            || after_marker.starts_with('\t')
289            || after_marker.is_empty()
290            || is_task
291        {
292            let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
293                marker_spaces_after(after_marker, _indent_cols + 1);
294            return Some(ListMarkerMatch {
295                marker: ListMarker::Bullet(ch),
296                marker_len: 1,
297                spaces_after_cols,
298                spaces_after_bytes,
299                virtual_marker_space,
300            });
301        }
302    }
303
304    // Try ordered markers
305    if config.extensions.fancy_lists
306        && let Some(after_marker) = trimmed.strip_prefix("#.")
307        && (after_marker.starts_with(' ')
308            || after_marker.starts_with('\t')
309            || after_marker.is_empty())
310    {
311        let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
312            marker_spaces_after(after_marker, _indent_cols + 2);
313        return Some(ListMarkerMatch {
314            marker: ListMarker::Ordered(OrderedMarker::Hash),
315            marker_len: 2,
316            spaces_after_cols,
317            spaces_after_bytes,
318            virtual_marker_space,
319        });
320    }
321
322    // Try example lists: (@) or (@label)
323    if config.extensions.example_lists
324        && let Some(rest) = trimmed.strip_prefix("(@")
325    {
326        // Check if it has a label or is just (@)
327        let label_end = rest
328            .chars()
329            .take_while(|c| c.is_alphanumeric() || *c == '_' || *c == '-')
330            .count();
331
332        // Must be followed by ')'
333        if rest.len() > label_end && rest.chars().nth(label_end) == Some(')') {
334            let label = if label_end > 0 {
335                Some(rest[..label_end].to_string())
336            } else {
337                None
338            };
339
340            let after_marker = &rest[label_end + 1..];
341            if after_marker.starts_with(' ')
342                || after_marker.starts_with('\t')
343                || after_marker.is_empty()
344            {
345                let marker_len = 2 + label_end + 1; // "(@" + label + ")"
346                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
347                    marker_spaces_after(after_marker, _indent_cols + marker_len);
348                return Some(ListMarkerMatch {
349                    marker: ListMarker::Ordered(OrderedMarker::Example { label }),
350                    marker_len,
351                    spaces_after_cols,
352                    spaces_after_bytes,
353                    virtual_marker_space,
354                });
355            }
356        }
357    }
358
359    // Try parenthesized markers: (2), (a), (ii)
360    if let Some(rest) = trimmed.strip_prefix('(') {
361        if config.extensions.fancy_lists {
362            // Try decimal: (2)
363            let digit_count = rest.chars().take_while(|c| c.is_ascii_digit()).count();
364            if digit_count > 0
365                && rest.len() > digit_count
366                && rest.chars().nth(digit_count) == Some(')')
367            {
368                let number = &rest[..digit_count];
369                let after_marker = &rest[digit_count + 1..];
370                if after_marker.starts_with(' ')
371                    || after_marker.starts_with('\t')
372                    || after_marker.is_empty()
373                {
374                    let marker_len = 2 + digit_count;
375                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
376                        marker_spaces_after(after_marker, _indent_cols + marker_len);
377                    return Some(ListMarkerMatch {
378                        marker: ListMarker::Ordered(OrderedMarker::Decimal {
379                            number: number.to_string(),
380                            style: ListDelimiter::Parens,
381                        }),
382                        marker_len,
383                        spaces_after_cols,
384                        spaces_after_bytes,
385                        virtual_marker_space,
386                    });
387                }
388            }
389        }
390
391        // Try fancy lists if enabled (parenthesized markers)
392        if config.extensions.fancy_lists {
393            // Try Roman numerals first (to avoid ambiguity with letters i, v, x, etc.)
394
395            // Try lowercase Roman: (ii)
396            if let Some(len) = try_parse_roman_numeral(rest, false)
397                && rest.len() > len
398                && rest.as_bytes()[len] == b')'
399                && !single_char_roman_shadowed_by_alpha(
400                    rest.as_bytes(),
401                    len,
402                    false,
403                    open_alpha_hint,
404                    config.dialect,
405                )
406            {
407                let after_marker = &rest[len + 1..];
408                if after_marker.starts_with(' ')
409                    || after_marker.starts_with('\t')
410                    || after_marker.is_empty()
411                {
412                    let marker_len = len + 2;
413                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
414                        marker_spaces_after(after_marker, _indent_cols + marker_len);
415                    return Some(ListMarkerMatch {
416                        marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
417                            numeral: rest[..len].to_string(),
418                            style: ListDelimiter::Parens,
419                        }),
420                        marker_len,
421                        spaces_after_cols,
422                        spaces_after_bytes,
423                        virtual_marker_space,
424                    });
425                }
426            }
427
428            // Try uppercase Roman: (II)
429            if let Some(len) = try_parse_roman_numeral(rest, true)
430                && rest.len() > len
431                && rest.as_bytes()[len] == b')'
432                && !single_char_roman_shadowed_by_alpha(
433                    rest.as_bytes(),
434                    len,
435                    true,
436                    open_alpha_hint,
437                    config.dialect,
438                )
439            {
440                let after_marker = &rest[len + 1..];
441                if after_marker.starts_with(' ')
442                    || after_marker.starts_with('\t')
443                    || after_marker.is_empty()
444                {
445                    let marker_len = len + 2;
446                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
447                        marker_spaces_after(after_marker, _indent_cols + marker_len);
448                    return Some(ListMarkerMatch {
449                        marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
450                            numeral: rest[..len].to_string(),
451                            style: ListDelimiter::Parens,
452                        }),
453                        marker_len,
454                        spaces_after_cols,
455                        spaces_after_bytes,
456                        virtual_marker_space,
457                    });
458                }
459            }
460
461            // Try lowercase letter: (a)
462            if let Some(ch) = rest.chars().next()
463                && ch.is_ascii_lowercase()
464                && rest.len() > 1
465                && rest.chars().nth(1) == Some(')')
466            {
467                let after_marker = &rest[2..];
468                if after_marker.starts_with(' ')
469                    || after_marker.starts_with('\t')
470                    || after_marker.is_empty()
471                {
472                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
473                        marker_spaces_after(after_marker, _indent_cols + 3);
474                    return Some(ListMarkerMatch {
475                        marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
476                            letter: ch,
477                            style: ListDelimiter::Parens,
478                        }),
479                        marker_len: 3,
480                        spaces_after_cols,
481                        spaces_after_bytes,
482                        virtual_marker_space,
483                    });
484                }
485            }
486
487            // Try uppercase letter: (A)
488            if let Some(ch) = rest.chars().next()
489                && ch.is_ascii_uppercase()
490                && rest.len() > 1
491                && rest.chars().nth(1) == Some(')')
492            {
493                let after_marker = &rest[2..];
494                if after_marker.starts_with(' ')
495                    || after_marker.starts_with('\t')
496                    || after_marker.is_empty()
497                {
498                    let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
499                        marker_spaces_after(after_marker, _indent_cols + 3);
500                    return Some(ListMarkerMatch {
501                        marker: ListMarker::Ordered(OrderedMarker::UpperAlpha {
502                            letter: ch,
503                            style: ListDelimiter::Parens,
504                        }),
505                        marker_len: 3,
506                        spaces_after_cols,
507                        spaces_after_bytes,
508                        virtual_marker_space,
509                    });
510                }
511            }
512        }
513    }
514
515    // Try decimal numbers: 1. or 1)
516    let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
517    if digit_count > 0 && trimmed.len() > digit_count {
518        // CommonMark restricts ordered list markers to 1-9 digits (spec §5.2).
519        // Pandoc-markdown accepts arbitrary digit counts.
520        if config.dialect == crate::Dialect::CommonMark && digit_count > 9 {
521            return None;
522        }
523
524        let number = &trimmed[..digit_count];
525        let delim = trimmed.chars().nth(digit_count);
526
527        let (style, marker_len) = match delim {
528            Some('.') => (ListDelimiter::Period, digit_count + 1),
529            Some(')') => (ListDelimiter::RightParen, digit_count + 1),
530            _ => return None,
531        };
532        // CommonMark §5.2: decimal `1)` markers are part of the core grammar.
533        // Pandoc-markdown gates `)`-style ordered markers behind `fancy_lists`.
534        if style == ListDelimiter::RightParen
535            && !config.extensions.fancy_lists
536            && config.dialect != crate::Dialect::CommonMark
537        {
538            return None;
539        }
540
541        let after_marker = &trimmed[marker_len..];
542        if after_marker.starts_with(' ')
543            || after_marker.starts_with('\t')
544            || after_marker.is_empty()
545        {
546            let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
547                marker_spaces_after(after_marker, _indent_cols + marker_len);
548            return Some(ListMarkerMatch {
549                marker: ListMarker::Ordered(OrderedMarker::Decimal {
550                    number: number.to_string(),
551                    style,
552                }),
553                marker_len,
554                spaces_after_cols,
555                spaces_after_bytes,
556                virtual_marker_space,
557            });
558        }
559    }
560
561    // Try fancy lists if enabled (non-parenthesized)
562    if config.extensions.fancy_lists {
563        // Try Roman numerals first, as they may overlap with letters
564
565        // Try lowercase Roman: i. or ii)
566        if let Some(len) = try_parse_roman_numeral(trimmed, false)
567            && trimmed.len() > len
568            && let delim = trimmed.as_bytes()[len]
569            && (delim == b'.' || delim == b')')
570            && !single_char_roman_shadowed_by_alpha(
571                trimmed.as_bytes(),
572                len,
573                false,
574                open_alpha_hint,
575                config.dialect,
576            )
577        {
578            let style = if delim == b'.' {
579                ListDelimiter::Period
580            } else {
581                ListDelimiter::RightParen
582            };
583            let marker_len = len + 1;
584
585            let after_marker = &trimmed[marker_len..];
586            if after_marker.starts_with(' ')
587                || after_marker.starts_with('\t')
588                || after_marker.is_empty()
589            {
590                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
591                    marker_spaces_after(after_marker, _indent_cols + marker_len);
592                return Some(ListMarkerMatch {
593                    marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
594                        numeral: trimmed[..len].to_string(),
595                        style,
596                    }),
597                    marker_len,
598                    spaces_after_cols,
599                    spaces_after_bytes,
600                    virtual_marker_space,
601                });
602            }
603        }
604
605        // Try uppercase Roman: I. or II)
606        if let Some(len) = try_parse_roman_numeral(trimmed, true)
607            && trimmed.len() > len
608            && let delim = trimmed.as_bytes()[len]
609            && (delim == b'.' || delim == b')')
610            && !single_char_roman_shadowed_by_alpha(
611                trimmed.as_bytes(),
612                len,
613                true,
614                open_alpha_hint,
615                config.dialect,
616            )
617        {
618            let style = if delim == b'.' {
619                ListDelimiter::Period
620            } else {
621                ListDelimiter::RightParen
622            };
623            let marker_len = len + 1;
624
625            let after_marker = &trimmed[marker_len..];
626            // Pandoc: single-character uppercase Roman (I, V, X, L, C, D, M)
627            // followed by `.` requires two spaces, to avoid confusion with
628            // initials like "I. M. Pei". Multi-character romans (II., XII.,
629            // …) and the right-paren form (I)) only need one space. See
630            // pandoc/src/Text/Pandoc/Readers/Markdown.hs `orderedListStart`.
631            let min_spaces = if delim == b'.' && len == 1 { 2 } else { 1 };
632            let (effective_cols, _) = leading_indent_from(after_marker, _indent_cols + marker_len);
633
634            if (after_marker.starts_with(' ')
635                || after_marker.starts_with('\t')
636                || after_marker.is_empty())
637                && (after_marker.is_empty() || effective_cols >= min_spaces)
638            {
639                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
640                    marker_spaces_after(after_marker, _indent_cols + marker_len);
641                return Some(ListMarkerMatch {
642                    marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
643                        numeral: trimmed[..len].to_string(),
644                        style,
645                    }),
646                    marker_len,
647                    spaces_after_cols,
648                    spaces_after_bytes,
649                    virtual_marker_space,
650                });
651            }
652        }
653
654        // Try lowercase letter: a. or a)
655        if let Some(ch) = trimmed.chars().next()
656            && ch.is_ascii_lowercase()
657            && trimmed.len() > 1
658            && let Some(delim) = trimmed.chars().nth(1)
659            && (delim == '.' || delim == ')')
660        {
661            let style = if delim == '.' {
662                ListDelimiter::Period
663            } else {
664                ListDelimiter::RightParen
665            };
666            let marker_len = 2;
667
668            let after_marker = &trimmed[marker_len..];
669            if after_marker.starts_with(' ')
670                || after_marker.starts_with('\t')
671                || after_marker.is_empty()
672            {
673                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
674                    marker_spaces_after(after_marker, _indent_cols + marker_len);
675                return Some(ListMarkerMatch {
676                    marker: ListMarker::Ordered(OrderedMarker::LowerAlpha { letter: ch, style }),
677                    marker_len,
678                    spaces_after_cols,
679                    spaces_after_bytes,
680                    virtual_marker_space,
681                });
682            }
683        }
684
685        // Try uppercase letter: A. or A)
686        if let Some(ch) = trimmed.chars().next()
687            && ch.is_ascii_uppercase()
688            && trimmed.len() > 1
689            && let Some(delim) = trimmed.chars().nth(1)
690            && (delim == '.' || delim == ')')
691        {
692            let style = if delim == '.' {
693                ListDelimiter::Period
694            } else {
695                ListDelimiter::RightParen
696            };
697            let marker_len = 2;
698
699            let after_marker = &trimmed[marker_len..];
700            // Special rule: uppercase letter with period needs 2 spaces minimum
701            let min_spaces = if delim == '.' { 2 } else { 1 };
702            let (effective_cols, _) = leading_indent_from(after_marker, _indent_cols + marker_len);
703
704            if (after_marker.starts_with(' ') || after_marker.starts_with('\t'))
705                && effective_cols >= min_spaces
706            {
707                let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
708                    marker_spaces_after(after_marker, _indent_cols + marker_len);
709                return Some(ListMarkerMatch {
710                    marker: ListMarker::Ordered(OrderedMarker::UpperAlpha { letter: ch, style }),
711                    marker_len,
712                    spaces_after_cols,
713                    spaces_after_bytes,
714                    virtual_marker_space,
715                });
716            }
717        }
718    }
719
720    None
721}
722
723pub(crate) fn markers_match(a: &ListMarker, b: &ListMarker, dialect: crate::Dialect) -> bool {
724    match (a, b) {
725        // CommonMark §5.3: bullet list markers `-`, `+`, `*` are *distinct*
726        // bullet types — switching from one to another starts a new list.
727        // Pandoc-markdown treats them as interchangeable: any bullet
728        // continues an open bullet list. Verified with pandoc against
729        // `- foo\n- bar\n+ baz\n` (#301).
730        (ListMarker::Bullet(ca), ListMarker::Bullet(cb)) => match dialect {
731            crate::Dialect::CommonMark => ca == cb,
732            _ => true,
733        },
734        (ListMarker::Ordered(OrderedMarker::Hash), ListMarker::Ordered(OrderedMarker::Hash)) => {
735            true
736        }
737        (
738            ListMarker::Ordered(OrderedMarker::Decimal { style: s1, .. }),
739            ListMarker::Ordered(OrderedMarker::Decimal { style: s2, .. }),
740        ) => s1 == s2,
741        (
742            ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s1, .. }),
743            ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s2, .. }),
744        ) => s1 == s2,
745        (
746            ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s1, .. }),
747            ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s2, .. }),
748        ) => s1 == s2,
749        (
750            ListMarker::Ordered(OrderedMarker::LowerRoman { style: s1, .. }),
751            ListMarker::Ordered(OrderedMarker::LowerRoman { style: s2, .. }),
752        ) => s1 == s2,
753        (
754            ListMarker::Ordered(OrderedMarker::UpperRoman { style: s1, .. }),
755            ListMarker::Ordered(OrderedMarker::UpperRoman { style: s2, .. }),
756        ) => s1 == s2,
757        (
758            ListMarker::Ordered(OrderedMarker::Example { .. }),
759            ListMarker::Ordered(OrderedMarker::Example { .. }),
760        ) => true, // All example list items match each other
761        _ => false,
762    }
763}
764
765/// Emit a list item node to the builder (marker and whitespace only).
766/// Returns (content_col, text_to_buffer) where text_to_buffer is the content that should be
767/// added to the list item buffer for later inline parsing.
768pub(in crate::parser) fn emit_list_item(
769    builder: &mut GreenNodeBuilder<'static>,
770    item: &ListItemEmissionInput<'_>,
771) -> (usize, String) {
772    builder.start_node(SyntaxKind::LIST_ITEM.into());
773
774    // Emit leading indentation for lossless parsing
775    if item.indent_bytes > 0 {
776        builder.token(
777            SyntaxKind::WHITESPACE.into(),
778            &item.content[..item.indent_bytes],
779        );
780    }
781
782    let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
783    builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
784
785    if item.spaces_after_bytes > 0 {
786        let space_start = item.indent_bytes + item.marker_len;
787        let space_end = space_start + item.spaces_after_bytes;
788        if space_end <= item.content.len() {
789            builder.token(
790                SyntaxKind::WHITESPACE.into(),
791                &item.content[space_start..space_end],
792            );
793        }
794    }
795
796    let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
797    let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
798
799    // Extract text content to be buffered (instead of emitting it directly).
800    // If the item starts with a task checkbox, emit it as a dedicated token so it
801    // doesn't get parsed as a link.
802    let text_to_buffer = if content_start < item.content.len() {
803        let rest = &item.content[content_start..];
804        if (rest.starts_with("[ ]") || rest.starts_with("[x]") || rest.starts_with("[X]"))
805            && rest
806                .as_bytes()
807                .get(3)
808                .is_some_and(|b| (*b as char).is_whitespace())
809        {
810            builder.token(SyntaxKind::TASK_CHECKBOX.into(), &rest[..3]);
811            rest[3..].to_string()
812        } else {
813            rest.to_string()
814        }
815    } else {
816        String::new()
817    };
818
819    (content_col, text_to_buffer)
820}
821
822#[cfg(test)]
823mod tests {
824    use super::*;
825    use crate::options::ParserOptions;
826
827    #[test]
828    fn detects_bullet_markers() {
829        let config = ParserOptions::default();
830        assert!(try_parse_list_marker("* item", &config, OpenListHint::None).is_some());
831        assert!(try_parse_list_marker("*\titem", &config, OpenListHint::None).is_some());
832    }
833
834    #[test]
835    fn detects_fancy_alpha_markers() {
836        let mut config = ParserOptions::default();
837        config.extensions.fancy_lists = true;
838
839        // Test lowercase alpha period
840        assert!(
841            try_parse_list_marker("a. item", &config, OpenListHint::None).is_some(),
842            "a. should parse"
843        );
844        assert!(
845            try_parse_list_marker("b. item", &config, OpenListHint::None).is_some(),
846            "b. should parse"
847        );
848        assert!(
849            try_parse_list_marker("c. item", &config, OpenListHint::None).is_some(),
850            "c. should parse"
851        );
852
853        // Test lowercase alpha right paren
854        assert!(
855            try_parse_list_marker("a) item", &config, OpenListHint::None).is_some(),
856            "a) should parse"
857        );
858        assert!(
859            try_parse_list_marker("b) item", &config, OpenListHint::None).is_some(),
860            "b) should parse"
861        );
862    }
863
864    #[test]
865    fn single_letter_i_classified_as_alpha_with_lower_alpha_hint() {
866        let config = ParserOptions::default(); // Pandoc + fancy_lists
867        let m = try_parse_list_marker("i. foo", &config, OpenListHint::LowerAlpha).unwrap();
868        assert!(
869            matches!(
870                m.marker,
871                ListMarker::Ordered(OrderedMarker::LowerAlpha { letter: 'i', .. })
872            ),
873            "i. should classify as LowerAlpha when a LowerAlpha list is open: got {:?}",
874            m.marker
875        );
876    }
877
878    #[test]
879    fn single_letter_i_classified_as_roman_with_no_hint() {
880        let config = ParserOptions::default();
881        let m = try_parse_list_marker("i. foo", &config, OpenListHint::None).unwrap();
882        assert!(
883            matches!(
884                m.marker,
885                ListMarker::Ordered(OrderedMarker::LowerRoman { .. })
886            ),
887            "i. should classify as LowerRoman with no hint: got {:?}",
888            m.marker
889        );
890    }
891
892    #[test]
893    fn multichar_roman_ignores_hint() {
894        let config = ParserOptions::default();
895        let m = try_parse_list_marker("ii. foo", &config, OpenListHint::LowerAlpha).unwrap();
896        assert!(
897            matches!(
898                m.marker,
899                ListMarker::Ordered(OrderedMarker::LowerRoman { .. })
900            ),
901            "ii. must stay LowerRoman regardless of hint: got {:?}",
902            m.marker
903        );
904    }
905
906    #[test]
907    fn hint_ignored_in_commonmark_dialect() {
908        // CommonMark doesn't enable fancy_lists, so `i.` isn't recognized as
909        // an ordered marker at all in that dialect. The hint must not change
910        // that outcome.
911        let config = ParserOptions {
912            dialect: crate::Dialect::CommonMark,
913            extensions: crate::options::Extensions {
914                fancy_lists: false,
915                ..Default::default()
916            },
917            ..Default::default()
918        };
919        assert!(
920            try_parse_list_marker("i. foo", &config, OpenListHint::LowerAlpha).is_none(),
921            "i. should not parse as a list marker under CommonMark"
922        );
923    }
924
925    #[test]
926    fn uppercase_i_classified_as_alpha_with_upper_alpha_hint() {
927        let config = ParserOptions::default();
928        // Uppercase + period requires 2 spaces (the I.M.Pei rule).
929        let m = try_parse_list_marker("I.  foo", &config, OpenListHint::UpperAlpha).unwrap();
930        assert!(
931            matches!(
932                m.marker,
933                ListMarker::Ordered(OrderedMarker::UpperAlpha { letter: 'I', .. })
934            ),
935            "I. should classify as UpperAlpha when an UpperAlpha list is open: got {:?}",
936            m.marker
937        );
938    }
939
940    #[test]
941    fn lowercase_hint_does_not_shadow_uppercase_candidate() {
942        let config = ParserOptions::default();
943        let m = try_parse_list_marker("I.  foo", &config, OpenListHint::LowerAlpha).unwrap();
944        assert!(
945            matches!(
946                m.marker,
947                ListMarker::Ordered(OrderedMarker::UpperRoman { .. })
948            ),
949            "I. + LowerAlpha hint must stay UpperRoman (case mismatch): got {:?}",
950            m.marker
951        );
952    }
953
954    #[test]
955    fn parenthesized_single_letter_i_obeys_hint() {
956        let config = ParserOptions::default();
957        let m = try_parse_list_marker("(i) foo", &config, OpenListHint::LowerAlpha).unwrap();
958        assert!(
959            matches!(
960                m.marker,
961                ListMarker::Ordered(OrderedMarker::LowerAlpha { letter: 'i', .. })
962            ),
963            "(i) should classify as LowerAlpha when a LowerAlpha list is open: got {:?}",
964            m.marker
965        );
966    }
967
968    #[test]
969    fn open_list_hint_at_indent_lower_alpha_at_same_indent() {
970        use crate::parser::utils::container_stack::{Container, ContainerStack};
971        let mut stack = ContainerStack::new();
972        stack.stack.push(Container::List {
973            marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
974                letter: 'a',
975                style: ListDelimiter::Period,
976            }),
977            base_indent_cols: 0,
978            has_blank_between_items: false,
979        });
980        assert_eq!(
981            open_list_hint_at_indent(&stack, 0),
982            OpenListHint::LowerAlpha
983        );
984    }
985
986    #[test]
987    fn open_list_hint_at_indent_returns_none_when_indent_differs() {
988        // Protects nested-roman-inside-alpha: an `i.` at indent 3 must NOT
989        // be reclassified against the outer alpha at indent 0.
990        use crate::parser::utils::container_stack::{Container, ContainerStack};
991        let mut stack = ContainerStack::new();
992        stack.stack.push(Container::List {
993            marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
994                letter: 'a',
995                style: ListDelimiter::Period,
996            }),
997            base_indent_cols: 0,
998            has_blank_between_items: false,
999        });
1000        assert_eq!(open_list_hint_at_indent(&stack, 3), OpenListHint::None);
1001    }
1002
1003    #[test]
1004    fn open_list_hint_at_indent_returns_none_for_decimal_or_roman() {
1005        use crate::parser::utils::container_stack::{Container, ContainerStack};
1006        let mut stack = ContainerStack::new();
1007        stack.stack.push(Container::List {
1008            marker: ListMarker::Ordered(OrderedMarker::Decimal {
1009                number: "1".to_string(),
1010                style: ListDelimiter::Period,
1011            }),
1012            base_indent_cols: 0,
1013            has_blank_between_items: false,
1014        });
1015        assert_eq!(open_list_hint_at_indent(&stack, 0), OpenListHint::None);
1016
1017        let mut stack = ContainerStack::new();
1018        stack.stack.push(Container::List {
1019            marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
1020                numeral: "i".to_string(),
1021                style: ListDelimiter::Period,
1022            }),
1023            base_indent_cols: 0,
1024            has_blank_between_items: false,
1025        });
1026        assert_eq!(open_list_hint_at_indent(&stack, 0), OpenListHint::None);
1027    }
1028
1029    #[test]
1030    fn open_list_hint_at_indent_stops_at_blockquote_barrier() {
1031        use crate::parser::utils::container_stack::{Container, ContainerStack};
1032        let mut stack = ContainerStack::new();
1033        stack.stack.push(Container::List {
1034            marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
1035                letter: 'a',
1036                style: ListDelimiter::Period,
1037            }),
1038            base_indent_cols: 0,
1039            has_blank_between_items: false,
1040        });
1041        stack.stack.push(Container::BlockQuote {});
1042        // Inside the blockquote at indent 0: the outer alpha must not leak in.
1043        assert_eq!(open_list_hint_at_indent(&stack, 0), OpenListHint::None);
1044    }
1045}
1046
1047#[test]
1048fn markers_match_fancy_lists() {
1049    use ListDelimiter::*;
1050    use ListMarker::*;
1051    use OrderedMarker::*;
1052
1053    // Same type and style should match
1054    let a_period = Ordered(LowerAlpha {
1055        letter: 'a',
1056        style: Period,
1057    });
1058    let b_period = Ordered(LowerAlpha {
1059        letter: 'b',
1060        style: Period,
1061    });
1062    assert!(
1063        markers_match(&a_period, &b_period, crate::Dialect::Pandoc),
1064        "a. and b. should match"
1065    );
1066
1067    let i_period = Ordered(LowerRoman {
1068        numeral: "i".to_string(),
1069        style: Period,
1070    });
1071    let ii_period = Ordered(LowerRoman {
1072        numeral: "ii".to_string(),
1073        style: Period,
1074    });
1075    assert!(
1076        markers_match(&i_period, &ii_period, crate::Dialect::Pandoc),
1077        "i. and ii. should match"
1078    );
1079
1080    // Different styles should not match
1081    let a_paren = Ordered(LowerAlpha {
1082        letter: 'a',
1083        style: RightParen,
1084    });
1085    assert!(
1086        !markers_match(&a_period, &a_paren, crate::Dialect::Pandoc),
1087        "a. and a) should not match"
1088    );
1089}
1090
1091#[test]
1092fn markers_match_bullet_dialect_split() {
1093    use ListMarker::*;
1094    // Pandoc: any bullet matches any bullet (same list).
1095    assert!(markers_match(
1096        &Bullet('-'),
1097        &Bullet('+'),
1098        crate::Dialect::Pandoc
1099    ));
1100    // CommonMark: bullets match only when the marker character is the same.
1101    assert!(markers_match(
1102        &Bullet('-'),
1103        &Bullet('-'),
1104        crate::Dialect::CommonMark
1105    ));
1106    assert!(!markers_match(
1107        &Bullet('-'),
1108        &Bullet('+'),
1109        crate::Dialect::CommonMark
1110    ));
1111    assert!(!markers_match(
1112        &Bullet('*'),
1113        &Bullet('-'),
1114        crate::Dialect::CommonMark
1115    ));
1116}
1117
1118#[test]
1119fn detects_complex_roman_numerals() {
1120    let mut config = ParserOptions::default();
1121    config.extensions.fancy_lists = true;
1122
1123    // Test various Roman numerals
1124    assert!(
1125        try_parse_list_marker("iv. item", &config, OpenListHint::None).is_some(),
1126        "iv. should parse"
1127    );
1128    assert!(
1129        try_parse_list_marker("v. item", &config, OpenListHint::None).is_some(),
1130        "v. should parse"
1131    );
1132    assert!(
1133        try_parse_list_marker("vi. item", &config, OpenListHint::None).is_some(),
1134        "vi. should parse"
1135    );
1136    assert!(
1137        try_parse_list_marker("vii. item", &config, OpenListHint::None).is_some(),
1138        "vii. should parse"
1139    );
1140    assert!(
1141        try_parse_list_marker("viii. item", &config, OpenListHint::None).is_some(),
1142        "viii. should parse"
1143    );
1144    assert!(
1145        try_parse_list_marker("ix. item", &config, OpenListHint::None).is_some(),
1146        "ix. should parse"
1147    );
1148    assert!(
1149        try_parse_list_marker("x. item", &config, OpenListHint::None).is_some(),
1150        "x. should parse"
1151    );
1152}
1153
1154#[test]
1155fn detects_example_list_markers() {
1156    let mut config = ParserOptions::default();
1157    config.extensions.example_lists = true;
1158
1159    // Test unlabeled example
1160    assert!(
1161        try_parse_list_marker("(@) item", &config, OpenListHint::None).is_some(),
1162        "(@) should parse"
1163    );
1164
1165    // Test labeled examples
1166    assert!(
1167        try_parse_list_marker("(@foo) item", &config, OpenListHint::None).is_some(),
1168        "(@foo) should parse"
1169    );
1170    assert!(
1171        try_parse_list_marker("(@my_label) item", &config, OpenListHint::None).is_some(),
1172        "(@my_label) should parse"
1173    );
1174    assert!(
1175        try_parse_list_marker("(@test-123) item", &config, OpenListHint::None).is_some(),
1176        "(@test-123) should parse"
1177    );
1178
1179    // Test with extension disabled
1180    let disabled_config = ParserOptions {
1181        extensions: crate::options::Extensions {
1182            example_lists: false,
1183            ..Default::default()
1184        },
1185        ..Default::default()
1186    };
1187    assert!(
1188        try_parse_list_marker("(@) item", &disabled_config, OpenListHint::None).is_none(),
1189        "(@) should not parse when extension disabled"
1190    );
1191}
1192
1193#[test]
1194fn deep_ordered_prefers_nearest_enclosing_indent_over_nearest_below() {
1195    use crate::parser::utils::container_stack::{Container, ContainerStack};
1196
1197    let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
1198        numeral: "ii".to_string(),
1199        style: ListDelimiter::Period,
1200    });
1201
1202    let mut containers = ContainerStack::new();
1203    containers.push(Container::List {
1204        marker: marker.clone(),
1205        base_indent_cols: 8,
1206        has_blank_between_items: false,
1207    });
1208    containers.push(Container::ListItem {
1209        content_col: 11,
1210        buffer: crate::parser::utils::list_item_buffer::ListItemBuffer::new(),
1211        marker_only: false,
1212        virtual_marker_space: false,
1213    });
1214    containers.push(Container::List {
1215        marker,
1216        base_indent_cols: 6,
1217        has_blank_between_items: false,
1218    });
1219
1220    // With deep ordered drift (indent 7), we should keep the enclosing level
1221    // (base indent 8), not re-associate to the nearest lower sibling level (6).
1222    assert_eq!(
1223        find_matching_list_level(
1224            &containers,
1225            &ListMarker::Ordered(OrderedMarker::LowerRoman {
1226                numeral: "iii".to_string(),
1227                style: ListDelimiter::Period,
1228            }),
1229            7,
1230            crate::Dialect::Pandoc,
1231        ),
1232        Some(0)
1233    );
1234}
1235
1236#[test]
1237fn deep_ordered_matches_exact_indent_when_available() {
1238    use crate::parser::utils::container_stack::{Container, ContainerStack};
1239
1240    let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
1241        numeral: "ii".to_string(),
1242        style: ListDelimiter::Period,
1243    });
1244
1245    let mut containers = ContainerStack::new();
1246    containers.push(Container::List {
1247        marker: marker.clone(),
1248        base_indent_cols: 8,
1249        has_blank_between_items: false,
1250    });
1251    containers.push(Container::List {
1252        marker,
1253        base_indent_cols: 6,
1254        has_blank_between_items: false,
1255    });
1256
1257    assert_eq!(
1258        find_matching_list_level(
1259            &containers,
1260            &ListMarker::Ordered(OrderedMarker::LowerRoman {
1261                numeral: "iii".to_string(),
1262                style: ListDelimiter::Period,
1263            }),
1264            6,
1265            crate::Dialect::Pandoc,
1266        ),
1267        Some(1)
1268    );
1269}
1270
1271#[test]
1272fn parses_nested_bullet_list_from_single_marker() {
1273    use crate::parse;
1274    use crate::syntax::SyntaxKind;
1275
1276    let config = ParserOptions::default();
1277
1278    // Test all three bullet marker combinations as nested lists
1279    for (input, desc) in [("- *\n", "- *"), ("- +\n", "- +"), ("- -\n", "- -")] {
1280        let tree = parse(input, Some(config.clone()));
1281
1282        // tree IS the DOCUMENT node
1283        assert_eq!(
1284            tree.kind(),
1285            SyntaxKind::DOCUMENT,
1286            "{desc}: root should be DOCUMENT"
1287        );
1288
1289        // Should have a LIST as first child of DOCUMENT
1290        let outer_list = tree
1291            .children()
1292            .find(|n| n.kind() == SyntaxKind::LIST)
1293            .unwrap_or_else(|| panic!("{desc}: should have outer LIST node"));
1294
1295        // Outer list should have a LIST_ITEM
1296        let outer_item = outer_list
1297            .children()
1298            .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
1299            .unwrap_or_else(|| panic!("{desc}: should have outer LIST_ITEM"));
1300
1301        // Outer list item should contain a nested LIST (not PLAIN with TEXT)
1302        let nested_list = outer_item
1303            .children()
1304            .find(|n| n.kind() == SyntaxKind::LIST)
1305            .unwrap_or_else(|| {
1306                panic!(
1307                    "{desc}: outer LIST_ITEM should contain nested LIST, got: {:?}",
1308                    outer_item.children().map(|n| n.kind()).collect::<Vec<_>>()
1309                )
1310            });
1311
1312        // Nested list should have a LIST_ITEM
1313        let nested_item = nested_list
1314            .children()
1315            .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
1316            .unwrap_or_else(|| panic!("{desc}: nested LIST should have LIST_ITEM"));
1317
1318        // Nested list item should be empty (no PLAIN or TEXT content)
1319        let has_plain = nested_item
1320            .children()
1321            .any(|n| n.kind() == SyntaxKind::PLAIN);
1322        assert!(
1323            !has_plain,
1324            "{desc}: nested LIST_ITEM should not have PLAIN node (should be empty)"
1325        );
1326    }
1327}
1328
1329// Helper functions for list management in Parser
1330
1331/// Check if we're in any list.
1332pub(in crate::parser) fn in_list(containers: &ContainerStack) -> bool {
1333    containers
1334        .stack
1335        .iter()
1336        .any(|c| matches!(c, Container::List { .. }))
1337}
1338
1339/// Check if we're in a list inside a blockquote.
1340pub(in crate::parser) fn in_blockquote_list(containers: &ContainerStack) -> bool {
1341    let mut seen_blockquote = false;
1342    for c in &containers.stack {
1343        if matches!(c, Container::BlockQuote { .. }) {
1344            seen_blockquote = true;
1345        }
1346        if seen_blockquote && matches!(c, Container::List { .. }) {
1347            return true;
1348        }
1349    }
1350    false
1351}
1352
1353/// Return the kind of open alphabetic list at exactly `indent_cols`, if any.
1354///
1355/// Walks the container stack from deepest to shallowest, stopping at a
1356/// `Container::BlockQuote` barrier (mirrors `find_matching_list_level`'s
1357/// barrier behavior so a list outside a blockquote can't influence
1358/// classification inside one). Returns `OpenListHint::None` for any
1359/// non-alpha marker or when no list is open at the queried indent.
1360///
1361/// Used by `try_parse_list_marker` to disambiguate single-letter Roman
1362/// candidates {i,v,x,I,V,X} against an open alpha list in Pandoc dialect.
1363/// The exact-indent gate is what protects nested Roman-inside-alpha
1364/// sublists like `a.\n   i.` — there the inner `i.` lives at a deeper
1365/// indent than the outer alpha base, so this returns `None` and Roman
1366/// classification wins.
1367pub(in crate::parser) fn open_list_hint_at_indent(
1368    containers: &ContainerStack,
1369    indent_cols: usize,
1370) -> OpenListHint {
1371    for c in containers.stack.iter().rev() {
1372        if matches!(c, Container::BlockQuote { .. }) {
1373            return OpenListHint::None;
1374        }
1375        if let Container::List {
1376            marker,
1377            base_indent_cols,
1378            ..
1379        } = c
1380            && *base_indent_cols == indent_cols
1381        {
1382            return match marker {
1383                ListMarker::Ordered(OrderedMarker::LowerAlpha { .. }) => OpenListHint::LowerAlpha,
1384                ListMarker::Ordered(OrderedMarker::UpperAlpha { .. }) => OpenListHint::UpperAlpha,
1385                _ => OpenListHint::None,
1386            };
1387        }
1388    }
1389    OpenListHint::None
1390}
1391
1392/// Find matching list level for a marker with the given indent.
1393pub(in crate::parser) fn find_matching_list_level(
1394    containers: &ContainerStack,
1395    marker: &ListMarker,
1396    indent_cols: usize,
1397    dialect: crate::Dialect,
1398) -> Option<usize> {
1399    // Search from deepest (last) to shallowest (first)
1400    // But for shallow items (0-3 indent), prefer matching at the closest base indent
1401    let mut best_match: Option<(usize, usize, bool)> = None; // (index, distance, base_leq_indent)
1402
1403    let is_deep_ordered = matches!(marker, ListMarker::Ordered(_)) && indent_cols >= 4;
1404    let mut best_above_match: Option<(usize, usize)> = None; // (index, delta = base - indent), ordered deep only
1405
1406    for (i, c) in containers.stack.iter().enumerate().rev() {
1407        // BlockQuote acts as a list-continuation barrier. A list outside a
1408        // BlockQuote can't be continued from inside the BlockQuote — opening
1409        // a BlockQuote starts a new container "world". Without this stop,
1410        // `- intro\n\n  > - 0:` matches the outer `-` list and closes the
1411        // freshly-opened BlockQuote (issue #292). Pandoc-native treats the
1412        // inner list as a child of the BlockQuote.
1413        if matches!(c, Container::BlockQuote { .. }) {
1414            break;
1415        }
1416        if let Container::List {
1417            marker: list_marker,
1418            base_indent_cols,
1419            ..
1420        } = c
1421            && markers_match(marker, list_marker, dialect)
1422        {
1423            let matches = if indent_cols >= 4 && *base_indent_cols >= 4 {
1424                // Deep indentation:
1425                // - bullets stay directional to preserve nesting boundaries
1426                // - ordered markers allow small symmetric drift to keep
1427                //   marker-width-aligned lists (i./ii./iii.) at one level
1428                match (marker, list_marker) {
1429                    (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
1430                        indent_cols.abs_diff(*base_indent_cols) <= 3
1431                    }
1432                    _ => indent_cols >= *base_indent_cols && indent_cols <= base_indent_cols + 3,
1433                }
1434            } else if indent_cols >= 4 || *base_indent_cols >= 4 {
1435                // One shallow, one deep:
1436                // - ordered markers still allow symmetric drift so aligned roman
1437                //   markers (e.g. 3/4/5 spaces for i./ii./iii.) stay at one level
1438                // - bullets remain directional to preserve nesting boundaries
1439                match (marker, list_marker) {
1440                    (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
1441                        indent_cols.abs_diff(*base_indent_cols) <= 3
1442                    }
1443                    _ => false,
1444                }
1445            } else {
1446                // Both at shallow indentation (0-3)
1447                // Allow items within 3 spaces
1448                indent_cols.abs_diff(*base_indent_cols) <= 3
1449            };
1450
1451            if matches {
1452                let distance = indent_cols.abs_diff(*base_indent_cols);
1453                let base_leq_indent = *base_indent_cols <= indent_cols;
1454
1455                // For deep ordered lists, avoid "nearest below" re-association caused by
1456                // formatter alignment shifts (e.g. i./ii./iii. becoming 6/7/8-space indents).
1457                // Prefer matching the nearest enclosing level whose base indent is >= current.
1458                if is_deep_ordered
1459                    && matches!(
1460                        (marker, list_marker),
1461                        (ListMarker::Ordered(_), ListMarker::Ordered(_))
1462                    )
1463                    && *base_indent_cols >= indent_cols
1464                {
1465                    let delta = *base_indent_cols - indent_cols;
1466                    if best_above_match.is_none_or(|(_, best_delta)| delta < best_delta) {
1467                        best_above_match = Some((i, delta));
1468                    }
1469                }
1470
1471                if let Some((_, best_dist, best_base_leq)) = best_match {
1472                    if distance < best_dist
1473                        || (distance == best_dist && base_leq_indent && !best_base_leq)
1474                    {
1475                        best_match = Some((i, distance, base_leq_indent));
1476                    }
1477                } else {
1478                    best_match = Some((i, distance, base_leq_indent));
1479                }
1480
1481                // If we found an exact match, return immediately
1482                if distance == 0 {
1483                    return Some(i);
1484                }
1485            }
1486        }
1487    }
1488
1489    if let Some((index, _)) = best_above_match {
1490        return Some(index);
1491    }
1492
1493    best_match.map(|(i, _, _)| i)
1494}
1495
1496/// Start a nested list within an existing list item.
1497pub(in crate::parser) fn start_nested_list(
1498    containers: &mut ContainerStack,
1499    builder: &mut GreenNodeBuilder<'static>,
1500    marker: &ListMarker,
1501    item: &ListItemEmissionInput<'_>,
1502    indent_to_emit: Option<&str>,
1503    config: &ParserOptions,
1504) -> ListItemFinish {
1505    // Emit the indent if needed
1506    if let Some(indent_str) = indent_to_emit {
1507        builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1508    }
1509
1510    // Start nested list
1511    builder.start_node(SyntaxKind::LIST.into());
1512    containers.push(Container::List {
1513        marker: marker.clone(),
1514        base_indent_cols: item.indent_cols,
1515        has_blank_between_items: false,
1516    });
1517
1518    // Add the nested list item
1519    let (content_col, text_to_buffer) = emit_list_item(builder, item);
1520    finish_list_item_with_optional_nested(
1521        containers,
1522        builder,
1523        content_col,
1524        text_to_buffer,
1525        item.virtual_marker_space,
1526        config,
1527    )
1528}
1529
1530/// Checks if the content after a list marker is exactly another bullet marker.
1531/// Returns the nested bullet marker character if detected.
1532pub(in crate::parser) fn is_content_nested_bullet_marker(
1533    content: &str,
1534    marker_len: usize,
1535    spaces_after_bytes: usize,
1536) -> Option<char> {
1537    let (_, indent_bytes) = leading_indent(content);
1538    let content_start = indent_bytes + marker_len + spaces_after_bytes;
1539
1540    if content_start >= content.len() {
1541        return None;
1542    }
1543
1544    let remaining = &content[content_start..];
1545    let (text_part, _) = strip_newline(remaining);
1546    let trimmed = text_part.trim();
1547
1548    // Check if it's exactly one of the bullet marker characters
1549    if trimmed.len() == 1 {
1550        let ch = trimmed.chars().next().unwrap();
1551        if matches!(ch, '*' | '+' | '-') {
1552            return Some(ch);
1553        }
1554    }
1555
1556    None
1557}
1558
1559/// Add a list item that contains a nested empty list (for cases like `- *`).
1560/// This creates: LIST_ITEM (outer) -> LIST (nested) -> LIST_ITEM (empty inner)
1561pub(in crate::parser) fn add_list_item_with_nested_empty_list(
1562    containers: &mut ContainerStack,
1563    builder: &mut GreenNodeBuilder<'static>,
1564    item: &ListItemEmissionInput<'_>,
1565    nested_marker: char,
1566) {
1567    // First, emit the outer list item (just marker + whitespace)
1568    builder.start_node(SyntaxKind::LIST_ITEM.into());
1569
1570    // Emit leading indentation for lossless parsing
1571    if item.indent_bytes > 0 {
1572        builder.token(
1573            SyntaxKind::WHITESPACE.into(),
1574            &item.content[..item.indent_bytes],
1575        );
1576    }
1577
1578    let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
1579    builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
1580
1581    if item.spaces_after_bytes > 0 {
1582        let space_start = item.indent_bytes + item.marker_len;
1583        let space_end = space_start + item.spaces_after_bytes;
1584        if space_end <= item.content.len() {
1585            builder.token(
1586                SyntaxKind::WHITESPACE.into(),
1587                &item.content[space_start..space_end],
1588            );
1589        }
1590    }
1591
1592    // Now start the nested list inside this item
1593    builder.start_node(SyntaxKind::LIST.into());
1594
1595    // Add empty list item to the nested list
1596    builder.start_node(SyntaxKind::LIST_ITEM.into());
1597    builder.token(SyntaxKind::LIST_MARKER.into(), &nested_marker.to_string());
1598
1599    // Extract and emit the newline from original content (lossless)
1600    let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
1601    if content_start < item.content.len() {
1602        let remaining = &item.content[content_start..];
1603        // Skip the nested marker character (1 byte) and get the newline
1604        if remaining.len() > 1 {
1605            let (_, newline_str) = strip_newline(&remaining[1..]);
1606            if !newline_str.is_empty() {
1607                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1608            }
1609        }
1610    }
1611
1612    builder.finish_node(); // Close nested LIST_ITEM
1613    builder.finish_node(); // Close nested LIST
1614
1615    // Push container for the outer list item
1616    let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
1617    containers.push(Container::ListItem {
1618        content_col,
1619        buffer: ListItemBuffer::new(),
1620        marker_only: false, // The nested LIST counts as real content.
1621        virtual_marker_space: item.virtual_marker_space,
1622    });
1623}
1624
1625/// Add a list item to the current list.
1626pub(in crate::parser) fn add_list_item(
1627    containers: &mut ContainerStack,
1628    builder: &mut GreenNodeBuilder<'static>,
1629    item: &ListItemEmissionInput<'_>,
1630    config: &ParserOptions,
1631) -> ListItemFinish {
1632    let (content_col, text_to_buffer) = emit_list_item(builder, item);
1633
1634    log::trace!(
1635        "add_list_item: content={:?}, text_to_buffer={:?}",
1636        item.content,
1637        text_to_buffer
1638    );
1639
1640    finish_list_item_with_optional_nested(
1641        containers,
1642        builder,
1643        content_col,
1644        text_to_buffer,
1645        item.virtual_marker_space,
1646        config,
1647    )
1648}
1649
1650/// Finish a list item by either buffering its content or, when the buffered
1651/// content begins with another list marker followed by content, recursively
1652/// opening a nested LIST with another LIST_ITEM. Pushes the appropriate
1653/// containers onto the stack so the caller doesn't need to.
1654fn finish_list_item_with_optional_nested(
1655    containers: &mut ContainerStack,
1656    builder: &mut GreenNodeBuilder<'static>,
1657    content_col: usize,
1658    text_to_buffer: String,
1659    virtual_marker_space: bool,
1660    config: &ParserOptions,
1661) -> ListItemFinish {
1662    // A line whose content is a thematic break (e.g. `* * *`) takes precedence
1663    // over being parsed as a sequence of nested list markers. Both dialects
1664    // agree: `- * * *` is a list item containing a thematic break, not a
1665    // chain of bullets.
1666    let buffered_is_thematic_break =
1667        super::horizontal_rules::try_parse_horizontal_rule(trim_end_newlines(&text_to_buffer))
1668            .is_some();
1669
1670    // Recursive same-line nested list emission applies to both dialects:
1671    // pandoc-markdown and CommonMark agree on the nested LIST_ITEM shape
1672    // for `- - foo`, `1. - 2. foo`, etc. (verified via `pandoc -f markdown
1673    // -t native` and `pandoc -f commonmark -t native`). The companion
1674    // formatter arm in `format_list_item` handles the LIST-first-child
1675    // shape so the round-trip stays idempotent.
1676
1677    if !buffered_is_thematic_break
1678        && let Some(inner_match) =
1679            try_parse_list_marker(&text_to_buffer, config, OpenListHint::None)
1680    {
1681        let inner_content_start = inner_match.marker_len + inner_match.spaces_after_bytes;
1682        let after_inner =
1683            trim_end_newlines(text_to_buffer.get(inner_content_start..).unwrap_or(""));
1684        // Recurse only when there is real content after the inner marker.
1685        // The bare-inner-marker case (e.g. `- *`) is handled by the existing
1686        // `add_list_item_with_nested_empty_list` path.
1687        if !after_inner.is_empty() {
1688            // Push outer ListItem with empty buffer.
1689            containers.push(Container::ListItem {
1690                content_col,
1691                buffer: ListItemBuffer::new(),
1692                marker_only: false, // The nested LIST counts as real content.
1693                virtual_marker_space,
1694            });
1695            // Open nested LIST inside the outer LIST_ITEM.
1696            builder.start_node(SyntaxKind::LIST.into());
1697            containers.push(Container::List {
1698                marker: inner_match.marker.clone(),
1699                base_indent_cols: content_col,
1700                has_blank_between_items: false,
1701            });
1702            // Emit nested LIST_ITEM via emit_list_item, then recurse on its
1703            // content for further-nested same-line markers.
1704            let inner_item = ListItemEmissionInput {
1705                content: text_to_buffer.as_str(),
1706                marker_len: inner_match.marker_len,
1707                spaces_after_cols: inner_match.spaces_after_cols,
1708                spaces_after_bytes: inner_match.spaces_after_bytes,
1709                indent_cols: content_col,
1710                indent_bytes: 0,
1711                virtual_marker_space: inner_match.virtual_marker_space,
1712            };
1713            let (inner_content_col, inner_text_to_buffer) = emit_list_item(builder, &inner_item);
1714            // Recursive call is for nested same-line markers (`- - foo`);
1715            // the inner content doesn't begin with `>` so no BqDispatch can
1716            // propagate up. Discard the result.
1717            let _ = finish_list_item_with_optional_nested(
1718                containers,
1719                builder,
1720                inner_content_col,
1721                inner_text_to_buffer,
1722                inner_match.virtual_marker_space,
1723                config,
1724            );
1725            return ListItemFinish::Done;
1726        }
1727    }
1728
1729    // Same-line blockquote marker inside a list item: `1. > Blockquote`
1730    // opens a BLOCK_QUOTE inside the LIST_ITEM, with the post-marker text
1731    // becoming the first line of the blockquote's paragraph. Both
1732    // CommonMark and Pandoc-markdown agree on this shape (verified via
1733    // `pandoc -f commonmark` and `pandoc -f markdown`). The companion
1734    // arm in `format_list_item` emits the LIST_MARKER and the BLOCK_QUOTE
1735    // contents on the same output line so the round-trip stays
1736    // idempotent.
1737    if !buffered_is_thematic_break
1738        && text_to_buffer.starts_with('>')
1739        && !text_to_buffer.starts_with(">>")
1740    {
1741        let bytes = text_to_buffer.as_bytes();
1742        let has_trailing_space = bytes.get(1).copied() == Some(b' ');
1743        let content_offset = if has_trailing_space { 2 } else { 1 };
1744        let remaining = &text_to_buffer[content_offset..];
1745
1746        // Push outer ListItem with empty buffer; the inner BLOCK_QUOTE
1747        // counts as real content so `marker_only` is false.
1748        containers.push(Container::ListItem {
1749            content_col,
1750            buffer: ListItemBuffer::new(),
1751            marker_only: false,
1752            virtual_marker_space,
1753        });
1754
1755        // Open BLOCK_QUOTE node inside the LIST_ITEM and emit the marker.
1756        builder.start_node(SyntaxKind::BLOCK_QUOTE.into());
1757        builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
1758        if has_trailing_space {
1759            builder.token(SyntaxKind::WHITESPACE.into(), " ");
1760        }
1761        containers.push(Container::BlockQuote {});
1762
1763        let trimmed = trim_end_newlines(remaining);
1764
1765        // If the BlockQuote content begins with another list marker
1766        // followed by real content, recursively open a nested LIST inside
1767        // the BLOCK_QUOTE. Both Pandoc-markdown and CommonMark agree:
1768        // `- > - foo` produces
1769        // `BulletList [BlockQuote [BulletList [[Plain "foo"]]]]`
1770        // (verified via `pandoc -f markdown` and `pandoc -f commonmark`).
1771        let inner_is_thematic_break =
1772            super::horizontal_rules::try_parse_horizontal_rule(trimmed).is_some();
1773        if !inner_is_thematic_break
1774            && let Some(inner_match) = try_parse_list_marker(remaining, config, OpenListHint::None)
1775        {
1776            let inner_content_start = inner_match.marker_len + inner_match.spaces_after_bytes;
1777            let after_inner = trim_end_newlines(remaining.get(inner_content_start..).unwrap_or(""));
1778            if !after_inner.is_empty() {
1779                let bq_content_col = content_col + content_offset;
1780                builder.start_node(SyntaxKind::LIST.into());
1781                containers.push(Container::List {
1782                    marker: inner_match.marker.clone(),
1783                    base_indent_cols: bq_content_col,
1784                    has_blank_between_items: false,
1785                });
1786                let inner_item = ListItemEmissionInput {
1787                    content: remaining,
1788                    marker_len: inner_match.marker_len,
1789                    spaces_after_cols: inner_match.spaces_after_cols,
1790                    spaces_after_bytes: inner_match.spaces_after_bytes,
1791                    indent_cols: bq_content_col,
1792                    indent_bytes: 0,
1793                    virtual_marker_space: inner_match.virtual_marker_space,
1794                };
1795                let (inner_content_col, inner_text_to_buffer) =
1796                    emit_list_item(builder, &inner_item);
1797                // Same as above: inner content doesn't start with `>` so no
1798                // BqDispatch can propagate.
1799                let _ = finish_list_item_with_optional_nested(
1800                    containers,
1801                    builder,
1802                    inner_content_col,
1803                    inner_text_to_buffer,
1804                    inner_match.virtual_marker_space,
1805                    config,
1806                );
1807                return ListItemFinish::Done;
1808            }
1809        }
1810
1811        // If there is content after `> `, hand it back to the caller so the
1812        // parser's block dispatcher can recognize block-level constructs
1813        // (HTML blocks, ATX headings, fenced code, …) instead of wrapping
1814        // the first line in a stray paragraph. Subsequent lines continue
1815        // via the parser's main loop (lazy continuation handles the
1816        // no-marker continuation line in cases like #292).
1817        if !trimmed.is_empty() {
1818            return ListItemFinish::BqDispatch {
1819                content: remaining.to_string(),
1820            };
1821        }
1822        return ListItemFinish::Done;
1823    }
1824
1825    let marker_only = text_to_buffer.trim().is_empty();
1826    let mut buffer = ListItemBuffer::new();
1827    if !text_to_buffer.is_empty() {
1828        buffer.push_text(text_to_buffer);
1829    }
1830    containers.push(Container::ListItem {
1831        content_col,
1832        buffer,
1833        marker_only,
1834        virtual_marker_space,
1835    });
1836    ListItemFinish::Done
1837}