Skip to main content

panache_parser/parser/blocks/
lists.rs

1use crate::options::ParserOptions;
2use crate::syntax::SyntaxKind;
3use rowan::GreenNodeBuilder;
4
5use crate::parser::utils::container_stack::{Container, ContainerStack, leading_indent};
6use crate::parser::utils::helpers::strip_newline;
7use crate::parser::utils::list_item_buffer::ListItemBuffer;
8
9#[derive(Debug, Clone, PartialEq)]
10pub(crate) enum ListMarker {
11    Bullet(char),
12    Ordered(OrderedMarker),
13}
14
15#[derive(Debug, Clone, PartialEq)]
16pub(crate) enum OrderedMarker {
17    Decimal {
18        number: String,
19        style: ListDelimiter,
20    },
21    Hash,
22    LowerAlpha {
23        letter: char,
24        style: ListDelimiter,
25    },
26    UpperAlpha {
27        letter: char,
28        style: ListDelimiter,
29    },
30    LowerRoman {
31        numeral: String,
32        style: ListDelimiter,
33    },
34    UpperRoman {
35        numeral: String,
36        style: ListDelimiter,
37    },
38    Example {
39        label: Option<String>,
40    },
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub(crate) enum ListDelimiter {
45    Period,
46    RightParen,
47    Parens,
48}
49
50#[derive(Debug, Clone, PartialEq)]
51pub(crate) struct ListMarkerMatch {
52    pub(crate) marker: ListMarker,
53    pub(crate) marker_len: usize,
54    pub(crate) spaces_after_cols: usize,
55    pub(crate) spaces_after_bytes: usize,
56}
57
58#[derive(Debug, Clone, Copy)]
59pub(in crate::parser) struct ListItemEmissionInput<'a> {
60    pub content: &'a str,
61    pub marker_len: usize,
62    pub spaces_after_cols: usize,
63    pub spaces_after_bytes: usize,
64    pub indent_cols: usize,
65    pub indent_bytes: usize,
66}
67
68/// Parse a Roman numeral (lower or upper case).
69/// Returns (numeral_string, length) if valid, None otherwise.
70fn try_parse_roman_numeral(text: &str, uppercase: bool) -> Option<(String, usize)> {
71    let valid_chars = if uppercase { "IVXLCDM" } else { "ivxlcdm" };
72
73    let count = text
74        .chars()
75        .take_while(|c| valid_chars.contains(*c))
76        .count();
77
78    if count == 0 {
79        return None;
80    }
81
82    let numeral = &text[..count];
83    let numeral_upper = numeral.to_uppercase();
84
85    // Only consider chars that are valid Roman numeral symbols
86    // Reject if it contains only non-Roman letters (a-z except i, v, x, l, c, d, m)
87    let has_only_roman_chars = numeral_upper.chars().all(|c| "IVXLCDM".contains(c));
88    if !has_only_roman_chars {
89        return None;
90    }
91
92    // For single-character numerals, only accept the most common ones to avoid
93    // ambiguity with alphabetic list markers (a-z, A-Z).
94    // Single L, C, D, M are valid Roman numerals but unlikely in list contexts.
95    if count == 1 {
96        let ch = numeral_upper.chars().next().unwrap();
97        if !matches!(ch, 'I' | 'V' | 'X') {
98            return None;
99        }
100    }
101
102    // Validate it's a proper Roman numeral (basic validation)
103    // Must not have more than 3 consecutive same characters (except M)
104    if numeral_upper.contains("IIII")
105        || numeral_upper.contains("XXXX")
106        || numeral_upper.contains("CCCC")
107        || numeral_upper.contains("VV")
108        || numeral_upper.contains("LL")
109        || numeral_upper.contains("DD")
110    {
111        return None;
112    }
113
114    // Must have valid subtractive notation (I before V/X, X before L/C, C before D/M)
115    // V, L, D can never appear before a larger numeral (no subtractive use)
116    let chars: Vec<char> = numeral_upper.chars().collect();
117    for i in 0..chars.len().saturating_sub(1) {
118        let curr = chars[i];
119        let next = chars[i + 1];
120
121        // Get Roman numeral values for comparison
122        let curr_val = match curr {
123            'I' => 1,
124            'V' => 5,
125            'X' => 10,
126            'L' => 50,
127            'C' => 100,
128            'D' => 500,
129            'M' => 1000,
130            _ => return None,
131        };
132        let next_val = match next {
133            'I' => 1,
134            'V' => 5,
135            'X' => 10,
136            'L' => 50,
137            'C' => 100,
138            'D' => 500,
139            'M' => 1000,
140            _ => return None,
141        };
142
143        // Check for invalid subtractive notation
144        if curr_val < next_val {
145            // Subtractive notation - check if it's valid
146            match (curr, next) {
147                ('I', 'V') | ('I', 'X') => {} // Valid: IV=4, IX=9
148                ('X', 'L') | ('X', 'C') => {} // Valid: XL=40, XC=90
149                ('C', 'D') | ('C', 'M') => {} // Valid: CD=400, CM=900
150                _ => return None,             // Invalid subtractive notation
151            }
152        }
153    }
154
155    Some((numeral.to_string(), count))
156}
157
158pub(crate) fn try_parse_list_marker(line: &str, config: &ParserOptions) -> Option<ListMarkerMatch> {
159    // Trailing newlines should not block bare-marker detection; the line `*\n`
160    // is a bare bullet marker and the post-marker text is logically empty.
161    let line = line.trim_end_matches(['\r', '\n']);
162    let (_indent_cols, indent_bytes) = leading_indent(line);
163    let trimmed = &line[indent_bytes..];
164
165    // Try bullet markers (including task lists)
166    if let Some(ch) = trimmed.chars().next()
167        && matches!(ch, '*' | '+' | '-')
168    {
169        let after_marker = &trimmed[1..];
170
171        // Check for task list: [ ] or [x] or [X]
172        let trimmed_after = after_marker.trim_start();
173        let is_task = trimmed_after.starts_with('[')
174            && trimmed_after.len() >= 3
175            && matches!(
176                trimmed_after.chars().nth(1),
177                Some(' ') | Some('x') | Some('X')
178            )
179            && trimmed_after.chars().nth(2) == Some(']');
180
181        // Must be followed by whitespace (or be task list)
182        if after_marker.starts_with(' ')
183            || after_marker.starts_with('\t')
184            || after_marker.is_empty()
185            || is_task
186        {
187            let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
188            return Some(ListMarkerMatch {
189                marker: ListMarker::Bullet(ch),
190                marker_len: 1,
191                spaces_after_cols,
192                spaces_after_bytes,
193            });
194        }
195    }
196
197    // Try ordered markers
198    if config.extensions.fancy_lists
199        && let Some(after_marker) = trimmed.strip_prefix("#.")
200        && (after_marker.starts_with(' ')
201            || after_marker.starts_with('\t')
202            || after_marker.is_empty())
203    {
204        let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
205        return Some(ListMarkerMatch {
206            marker: ListMarker::Ordered(OrderedMarker::Hash),
207            marker_len: 2,
208            spaces_after_cols,
209            spaces_after_bytes,
210        });
211    }
212
213    // Try example lists: (@) or (@label)
214    if config.extensions.example_lists
215        && let Some(rest) = trimmed.strip_prefix("(@")
216    {
217        // Check if it has a label or is just (@)
218        let label_end = rest
219            .chars()
220            .take_while(|c| c.is_alphanumeric() || *c == '_' || *c == '-')
221            .count();
222
223        // Must be followed by ')'
224        if rest.len() > label_end && rest.chars().nth(label_end) == Some(')') {
225            let label = if label_end > 0 {
226                Some(rest[..label_end].to_string())
227            } else {
228                None
229            };
230
231            let after_marker = &rest[label_end + 1..];
232            if after_marker.starts_with(' ')
233                || after_marker.starts_with('\t')
234                || after_marker.is_empty()
235            {
236                let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
237                let marker_len = 2 + label_end + 1; // "(@" + label + ")"
238                return Some(ListMarkerMatch {
239                    marker: ListMarker::Ordered(OrderedMarker::Example { label }),
240                    marker_len,
241                    spaces_after_cols,
242                    spaces_after_bytes,
243                });
244            }
245        }
246    }
247
248    // Try parenthesized markers: (2), (a), (ii)
249    if let Some(rest) = trimmed.strip_prefix('(') {
250        if config.extensions.fancy_lists {
251            // Try decimal: (2)
252            let digit_count = rest.chars().take_while(|c| c.is_ascii_digit()).count();
253            if digit_count > 0
254                && rest.len() > digit_count
255                && rest.chars().nth(digit_count) == Some(')')
256            {
257                let number = &rest[..digit_count];
258                let after_marker = &rest[digit_count + 1..];
259                if after_marker.starts_with(' ')
260                    || after_marker.starts_with('\t')
261                    || after_marker.is_empty()
262                {
263                    let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
264                    let marker_len = 2 + digit_count;
265                    return Some(ListMarkerMatch {
266                        marker: ListMarker::Ordered(OrderedMarker::Decimal {
267                            number: number.to_string(),
268                            style: ListDelimiter::Parens,
269                        }),
270                        marker_len,
271                        spaces_after_cols,
272                        spaces_after_bytes,
273                    });
274                }
275            }
276        }
277
278        // Try fancy lists if enabled (parenthesized markers)
279        if config.extensions.fancy_lists {
280            // Try Roman numerals first (to avoid ambiguity with letters i, v, x, etc.)
281
282            // Try lowercase Roman: (ii)
283            if let Some((numeral, len)) = try_parse_roman_numeral(rest, false)
284                && rest.len() > len
285                && rest.chars().nth(len) == Some(')')
286            {
287                let after_marker = &rest[len + 1..];
288                if after_marker.starts_with(' ')
289                    || after_marker.starts_with('\t')
290                    || after_marker.is_empty()
291                {
292                    let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
293                    return Some(ListMarkerMatch {
294                        marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
295                            numeral,
296                            style: ListDelimiter::Parens,
297                        }),
298                        marker_len: len + 2,
299                        spaces_after_cols,
300                        spaces_after_bytes,
301                    });
302                }
303            }
304
305            // Try uppercase Roman: (II)
306            if let Some((numeral, len)) = try_parse_roman_numeral(rest, true)
307                && rest.len() > len
308                && rest.chars().nth(len) == Some(')')
309            {
310                let after_marker = &rest[len + 1..];
311                if after_marker.starts_with(' ')
312                    || after_marker.starts_with('\t')
313                    || after_marker.is_empty()
314                {
315                    let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
316                    return Some(ListMarkerMatch {
317                        marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
318                            numeral,
319                            style: ListDelimiter::Parens,
320                        }),
321                        marker_len: len + 2,
322                        spaces_after_cols,
323                        spaces_after_bytes,
324                    });
325                }
326            }
327
328            // Try lowercase letter: (a)
329            if let Some(ch) = rest.chars().next()
330                && ch.is_ascii_lowercase()
331                && rest.len() > 1
332                && rest.chars().nth(1) == Some(')')
333            {
334                let after_marker = &rest[2..];
335                if after_marker.starts_with(' ')
336                    || after_marker.starts_with('\t')
337                    || after_marker.is_empty()
338                {
339                    let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
340                    return Some(ListMarkerMatch {
341                        marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
342                            letter: ch,
343                            style: ListDelimiter::Parens,
344                        }),
345                        marker_len: 3,
346                        spaces_after_cols,
347                        spaces_after_bytes,
348                    });
349                }
350            }
351
352            // Try uppercase letter: (A)
353            if let Some(ch) = rest.chars().next()
354                && ch.is_ascii_uppercase()
355                && rest.len() > 1
356                && rest.chars().nth(1) == Some(')')
357            {
358                let after_marker = &rest[2..];
359                if after_marker.starts_with(' ')
360                    || after_marker.starts_with('\t')
361                    || after_marker.is_empty()
362                {
363                    let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
364                    return Some(ListMarkerMatch {
365                        marker: ListMarker::Ordered(OrderedMarker::UpperAlpha {
366                            letter: ch,
367                            style: ListDelimiter::Parens,
368                        }),
369                        marker_len: 3,
370                        spaces_after_cols,
371                        spaces_after_bytes,
372                    });
373                }
374            }
375        }
376    }
377
378    // Try decimal numbers: 1. or 1)
379    let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
380    if digit_count > 0 && trimmed.len() > digit_count {
381        // CommonMark restricts ordered list markers to 1-9 digits (spec §5.2).
382        // Pandoc-markdown accepts arbitrary digit counts.
383        if config.dialect == crate::Dialect::CommonMark && digit_count > 9 {
384            return None;
385        }
386
387        let number = &trimmed[..digit_count];
388        let delim = trimmed.chars().nth(digit_count);
389
390        let (style, marker_len) = match delim {
391            Some('.') => (ListDelimiter::Period, digit_count + 1),
392            Some(')') => (ListDelimiter::RightParen, digit_count + 1),
393            _ => return None,
394        };
395        // CommonMark §5.2: decimal `1)` markers are part of the core grammar.
396        // Pandoc-markdown gates `)`-style ordered markers behind `fancy_lists`.
397        if style == ListDelimiter::RightParen
398            && !config.extensions.fancy_lists
399            && config.dialect != crate::Dialect::CommonMark
400        {
401            return None;
402        }
403
404        let after_marker = &trimmed[marker_len..];
405        if after_marker.starts_with(' ')
406            || after_marker.starts_with('\t')
407            || after_marker.is_empty()
408        {
409            let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
410            return Some(ListMarkerMatch {
411                marker: ListMarker::Ordered(OrderedMarker::Decimal {
412                    number: number.to_string(),
413                    style,
414                }),
415                marker_len,
416                spaces_after_cols,
417                spaces_after_bytes,
418            });
419        }
420    }
421
422    // Try fancy lists if enabled (non-parenthesized)
423    if config.extensions.fancy_lists {
424        // Try Roman numerals first, as they may overlap with letters
425
426        // Try lowercase Roman: i. or ii)
427        if let Some((numeral, len)) = try_parse_roman_numeral(trimmed, false)
428            && trimmed.len() > len
429            && let Some(delim) = trimmed.chars().nth(len)
430            && (delim == '.' || delim == ')')
431        {
432            let style = if delim == '.' {
433                ListDelimiter::Period
434            } else {
435                ListDelimiter::RightParen
436            };
437            let marker_len = len + 1;
438
439            let after_marker = &trimmed[marker_len..];
440            if after_marker.starts_with(' ')
441                || after_marker.starts_with('\t')
442                || after_marker.is_empty()
443            {
444                let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
445                return Some(ListMarkerMatch {
446                    marker: ListMarker::Ordered(OrderedMarker::LowerRoman { numeral, style }),
447                    marker_len,
448                    spaces_after_cols,
449                    spaces_after_bytes,
450                });
451            }
452        }
453
454        // Try uppercase Roman: I. or II)
455        if let Some((numeral, len)) = try_parse_roman_numeral(trimmed, true)
456            && trimmed.len() > len
457            && let Some(delim) = trimmed.chars().nth(len)
458            && (delim == '.' || delim == ')')
459        {
460            let style = if delim == '.' {
461                ListDelimiter::Period
462            } else {
463                ListDelimiter::RightParen
464            };
465            let marker_len = len + 1;
466
467            let after_marker = &trimmed[marker_len..];
468            if after_marker.starts_with(' ')
469                || after_marker.starts_with('\t')
470                || after_marker.is_empty()
471            {
472                let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
473                return Some(ListMarkerMatch {
474                    marker: ListMarker::Ordered(OrderedMarker::UpperRoman { numeral, style }),
475                    marker_len,
476                    spaces_after_cols,
477                    spaces_after_bytes,
478                });
479            }
480        }
481
482        // Try lowercase letter: a. or a)
483        if let Some(ch) = trimmed.chars().next()
484            && ch.is_ascii_lowercase()
485            && trimmed.len() > 1
486            && let Some(delim) = trimmed.chars().nth(1)
487            && (delim == '.' || delim == ')')
488        {
489            let style = if delim == '.' {
490                ListDelimiter::Period
491            } else {
492                ListDelimiter::RightParen
493            };
494            let marker_len = 2;
495
496            let after_marker = &trimmed[marker_len..];
497            if after_marker.starts_with(' ')
498                || after_marker.starts_with('\t')
499                || after_marker.is_empty()
500            {
501                let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
502                return Some(ListMarkerMatch {
503                    marker: ListMarker::Ordered(OrderedMarker::LowerAlpha { letter: ch, style }),
504                    marker_len,
505                    spaces_after_cols,
506                    spaces_after_bytes,
507                });
508            }
509        }
510
511        // Try uppercase letter: A. or A)
512        if let Some(ch) = trimmed.chars().next()
513            && ch.is_ascii_uppercase()
514            && trimmed.len() > 1
515            && let Some(delim) = trimmed.chars().nth(1)
516            && (delim == '.' || delim == ')')
517        {
518            let style = if delim == '.' {
519                ListDelimiter::Period
520            } else {
521                ListDelimiter::RightParen
522            };
523            let marker_len = 2;
524
525            let after_marker = &trimmed[marker_len..];
526            // Special rule: uppercase letter with period needs 2 spaces minimum
527            let min_spaces = if delim == '.' { 2 } else { 1 };
528            let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
529
530            if (after_marker.starts_with(' ') || after_marker.starts_with('\t'))
531                && spaces_after_cols >= min_spaces
532            {
533                return Some(ListMarkerMatch {
534                    marker: ListMarker::Ordered(OrderedMarker::UpperAlpha { letter: ch, style }),
535                    marker_len,
536                    spaces_after_cols,
537                    spaces_after_bytes,
538                });
539            }
540        }
541    }
542
543    None
544}
545
546pub(crate) fn markers_match(a: &ListMarker, b: &ListMarker) -> bool {
547    match (a, b) {
548        // All bullet list markers (-, *, +) are considered matching (Pandoc behavior)
549        (ListMarker::Bullet(_), ListMarker::Bullet(_)) => true,
550        (ListMarker::Ordered(OrderedMarker::Hash), ListMarker::Ordered(OrderedMarker::Hash)) => {
551            true
552        }
553        (
554            ListMarker::Ordered(OrderedMarker::Decimal { style: s1, .. }),
555            ListMarker::Ordered(OrderedMarker::Decimal { style: s2, .. }),
556        ) => s1 == s2,
557        (
558            ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s1, .. }),
559            ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s2, .. }),
560        ) => s1 == s2,
561        (
562            ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s1, .. }),
563            ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s2, .. }),
564        ) => s1 == s2,
565        (
566            ListMarker::Ordered(OrderedMarker::LowerRoman { style: s1, .. }),
567            ListMarker::Ordered(OrderedMarker::LowerRoman { style: s2, .. }),
568        ) => s1 == s2,
569        (
570            ListMarker::Ordered(OrderedMarker::UpperRoman { style: s1, .. }),
571            ListMarker::Ordered(OrderedMarker::UpperRoman { style: s2, .. }),
572        ) => s1 == s2,
573        (
574            ListMarker::Ordered(OrderedMarker::Example { .. }),
575            ListMarker::Ordered(OrderedMarker::Example { .. }),
576        ) => true, // All example list items match each other
577        _ => false,
578    }
579}
580
581/// Emit a list item node to the builder (marker and whitespace only).
582/// Returns (content_col, text_to_buffer) where text_to_buffer is the content that should be
583/// added to the list item buffer for later inline parsing.
584pub(in crate::parser) fn emit_list_item(
585    builder: &mut GreenNodeBuilder<'static>,
586    item: &ListItemEmissionInput<'_>,
587) -> (usize, String) {
588    builder.start_node(SyntaxKind::LIST_ITEM.into());
589
590    // Emit leading indentation for lossless parsing
591    if item.indent_bytes > 0 {
592        builder.token(
593            SyntaxKind::WHITESPACE.into(),
594            &item.content[..item.indent_bytes],
595        );
596    }
597
598    let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
599    builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
600
601    if item.spaces_after_bytes > 0 {
602        let space_start = item.indent_bytes + item.marker_len;
603        let space_end = space_start + item.spaces_after_bytes;
604        if space_end <= item.content.len() {
605            builder.token(
606                SyntaxKind::WHITESPACE.into(),
607                &item.content[space_start..space_end],
608            );
609        }
610    }
611
612    let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
613    let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
614
615    // Extract text content to be buffered (instead of emitting it directly).
616    // If the item starts with a task checkbox, emit it as a dedicated token so it
617    // doesn't get parsed as a link.
618    let text_to_buffer = if content_start < item.content.len() {
619        let rest = &item.content[content_start..];
620        if (rest.starts_with("[ ]") || rest.starts_with("[x]") || rest.starts_with("[X]"))
621            && rest
622                .as_bytes()
623                .get(3)
624                .is_some_and(|b| (*b as char).is_whitespace())
625        {
626            builder.token(SyntaxKind::TASK_CHECKBOX.into(), &rest[..3]);
627            rest[3..].to_string()
628        } else {
629            rest.to_string()
630        }
631    } else {
632        String::new()
633    };
634
635    (content_col, text_to_buffer)
636}
637
638#[cfg(test)]
639mod tests {
640    use super::*;
641    use crate::options::ParserOptions;
642
643    #[test]
644    fn detects_bullet_markers() {
645        let config = ParserOptions::default();
646        assert!(try_parse_list_marker("* item", &config).is_some());
647        assert!(try_parse_list_marker("*\titem", &config).is_some());
648    }
649
650    #[test]
651    fn detects_fancy_alpha_markers() {
652        let mut config = ParserOptions::default();
653        config.extensions.fancy_lists = true;
654
655        // Test lowercase alpha period
656        assert!(
657            try_parse_list_marker("a. item", &config).is_some(),
658            "a. should parse"
659        );
660        assert!(
661            try_parse_list_marker("b. item", &config).is_some(),
662            "b. should parse"
663        );
664        assert!(
665            try_parse_list_marker("c. item", &config).is_some(),
666            "c. should parse"
667        );
668
669        // Test lowercase alpha right paren
670        assert!(
671            try_parse_list_marker("a) item", &config).is_some(),
672            "a) should parse"
673        );
674        assert!(
675            try_parse_list_marker("b) item", &config).is_some(),
676            "b) should parse"
677        );
678    }
679}
680
681#[test]
682fn markers_match_fancy_lists() {
683    use ListDelimiter::*;
684    use ListMarker::*;
685    use OrderedMarker::*;
686
687    // Same type and style should match
688    let a_period = Ordered(LowerAlpha {
689        letter: 'a',
690        style: Period,
691    });
692    let b_period = Ordered(LowerAlpha {
693        letter: 'b',
694        style: Period,
695    });
696    assert!(
697        markers_match(&a_period, &b_period),
698        "a. and b. should match"
699    );
700
701    let i_period = Ordered(LowerRoman {
702        numeral: "i".to_string(),
703        style: Period,
704    });
705    let ii_period = Ordered(LowerRoman {
706        numeral: "ii".to_string(),
707        style: Period,
708    });
709    assert!(
710        markers_match(&i_period, &ii_period),
711        "i. and ii. should match"
712    );
713
714    // Different styles should not match
715    let a_paren = Ordered(LowerAlpha {
716        letter: 'a',
717        style: RightParen,
718    });
719    assert!(
720        !markers_match(&a_period, &a_paren),
721        "a. and a) should not match"
722    );
723}
724
725#[test]
726fn detects_complex_roman_numerals() {
727    let mut config = ParserOptions::default();
728    config.extensions.fancy_lists = true;
729
730    // Test various Roman numerals
731    assert!(
732        try_parse_list_marker("iv. item", &config).is_some(),
733        "iv. should parse"
734    );
735    assert!(
736        try_parse_list_marker("v. item", &config).is_some(),
737        "v. should parse"
738    );
739    assert!(
740        try_parse_list_marker("vi. item", &config).is_some(),
741        "vi. should parse"
742    );
743    assert!(
744        try_parse_list_marker("vii. item", &config).is_some(),
745        "vii. should parse"
746    );
747    assert!(
748        try_parse_list_marker("viii. item", &config).is_some(),
749        "viii. should parse"
750    );
751    assert!(
752        try_parse_list_marker("ix. item", &config).is_some(),
753        "ix. should parse"
754    );
755    assert!(
756        try_parse_list_marker("x. item", &config).is_some(),
757        "x. should parse"
758    );
759}
760
761#[test]
762fn detects_example_list_markers() {
763    let mut config = ParserOptions::default();
764    config.extensions.example_lists = true;
765
766    // Test unlabeled example
767    assert!(
768        try_parse_list_marker("(@) item", &config).is_some(),
769        "(@) should parse"
770    );
771
772    // Test labeled examples
773    assert!(
774        try_parse_list_marker("(@foo) item", &config).is_some(),
775        "(@foo) should parse"
776    );
777    assert!(
778        try_parse_list_marker("(@my_label) item", &config).is_some(),
779        "(@my_label) should parse"
780    );
781    assert!(
782        try_parse_list_marker("(@test-123) item", &config).is_some(),
783        "(@test-123) should parse"
784    );
785
786    // Test with extension disabled
787    let disabled_config = ParserOptions {
788        extensions: crate::options::Extensions {
789            example_lists: false,
790            ..Default::default()
791        },
792        ..Default::default()
793    };
794    assert!(
795        try_parse_list_marker("(@) item", &disabled_config).is_none(),
796        "(@) should not parse when extension disabled"
797    );
798}
799
800#[test]
801fn deep_ordered_prefers_nearest_enclosing_indent_over_nearest_below() {
802    use crate::parser::utils::container_stack::{Container, ContainerStack};
803
804    let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
805        numeral: "ii".to_string(),
806        style: ListDelimiter::Period,
807    });
808
809    let mut containers = ContainerStack::new();
810    containers.push(Container::List {
811        marker: marker.clone(),
812        base_indent_cols: 8,
813        has_blank_between_items: false,
814    });
815    containers.push(Container::ListItem {
816        content_col: 11,
817        buffer: crate::parser::utils::list_item_buffer::ListItemBuffer::new(),
818    });
819    containers.push(Container::List {
820        marker,
821        base_indent_cols: 6,
822        has_blank_between_items: false,
823    });
824
825    // With deep ordered drift (indent 7), we should keep the enclosing level
826    // (base indent 8), not re-associate to the nearest lower sibling level (6).
827    assert_eq!(
828        find_matching_list_level(
829            &containers,
830            &ListMarker::Ordered(OrderedMarker::LowerRoman {
831                numeral: "iii".to_string(),
832                style: ListDelimiter::Period,
833            }),
834            7
835        ),
836        Some(0)
837    );
838}
839
840#[test]
841fn deep_ordered_matches_exact_indent_when_available() {
842    use crate::parser::utils::container_stack::{Container, ContainerStack};
843
844    let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
845        numeral: "ii".to_string(),
846        style: ListDelimiter::Period,
847    });
848
849    let mut containers = ContainerStack::new();
850    containers.push(Container::List {
851        marker: marker.clone(),
852        base_indent_cols: 8,
853        has_blank_between_items: false,
854    });
855    containers.push(Container::List {
856        marker,
857        base_indent_cols: 6,
858        has_blank_between_items: false,
859    });
860
861    assert_eq!(
862        find_matching_list_level(
863            &containers,
864            &ListMarker::Ordered(OrderedMarker::LowerRoman {
865                numeral: "iii".to_string(),
866                style: ListDelimiter::Period,
867            }),
868            6
869        ),
870        Some(1)
871    );
872}
873
874#[test]
875fn parses_nested_bullet_list_from_single_marker() {
876    use crate::parse;
877    use crate::syntax::SyntaxKind;
878
879    let config = ParserOptions::default();
880
881    // Test all three bullet marker combinations as nested lists
882    for (input, desc) in [("- *\n", "- *"), ("- +\n", "- +"), ("- -\n", "- -")] {
883        let tree = parse(input, Some(config.clone()));
884
885        // tree IS the DOCUMENT node
886        assert_eq!(
887            tree.kind(),
888            SyntaxKind::DOCUMENT,
889            "{desc}: root should be DOCUMENT"
890        );
891
892        // Should have a LIST as first child of DOCUMENT
893        let outer_list = tree
894            .children()
895            .find(|n| n.kind() == SyntaxKind::LIST)
896            .unwrap_or_else(|| panic!("{desc}: should have outer LIST node"));
897
898        // Outer list should have a LIST_ITEM
899        let outer_item = outer_list
900            .children()
901            .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
902            .unwrap_or_else(|| panic!("{desc}: should have outer LIST_ITEM"));
903
904        // Outer list item should contain a nested LIST (not PLAIN with TEXT)
905        let nested_list = outer_item
906            .children()
907            .find(|n| n.kind() == SyntaxKind::LIST)
908            .unwrap_or_else(|| {
909                panic!(
910                    "{desc}: outer LIST_ITEM should contain nested LIST, got: {:?}",
911                    outer_item.children().map(|n| n.kind()).collect::<Vec<_>>()
912                )
913            });
914
915        // Nested list should have a LIST_ITEM
916        let nested_item = nested_list
917            .children()
918            .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
919            .unwrap_or_else(|| panic!("{desc}: nested LIST should have LIST_ITEM"));
920
921        // Nested list item should be empty (no PLAIN or TEXT content)
922        let has_plain = nested_item
923            .children()
924            .any(|n| n.kind() == SyntaxKind::PLAIN);
925        assert!(
926            !has_plain,
927            "{desc}: nested LIST_ITEM should not have PLAIN node (should be empty)"
928        );
929    }
930}
931
932// Helper functions for list management in Parser
933
934/// Check if we're in any list.
935pub(in crate::parser) fn in_list(containers: &ContainerStack) -> bool {
936    containers
937        .stack
938        .iter()
939        .any(|c| matches!(c, Container::List { .. }))
940}
941
942/// Check if we're in a list inside a blockquote.
943pub(in crate::parser) fn in_blockquote_list(containers: &ContainerStack) -> bool {
944    let mut seen_blockquote = false;
945    for c in &containers.stack {
946        if matches!(c, Container::BlockQuote { .. }) {
947            seen_blockquote = true;
948        }
949        if seen_blockquote && matches!(c, Container::List { .. }) {
950            return true;
951        }
952    }
953    false
954}
955
956/// Find matching list level for a marker with the given indent.
957pub(in crate::parser) fn find_matching_list_level(
958    containers: &ContainerStack,
959    marker: &ListMarker,
960    indent_cols: usize,
961) -> Option<usize> {
962    // Search from deepest (last) to shallowest (first)
963    // But for shallow items (0-3 indent), prefer matching at the closest base indent
964    let mut best_match: Option<(usize, usize, bool)> = None; // (index, distance, base_leq_indent)
965
966    let is_deep_ordered = matches!(marker, ListMarker::Ordered(_)) && indent_cols >= 4;
967    let mut best_above_match: Option<(usize, usize)> = None; // (index, delta = base - indent), ordered deep only
968
969    for (i, c) in containers.stack.iter().enumerate().rev() {
970        if let Container::List {
971            marker: list_marker,
972            base_indent_cols,
973            ..
974        } = c
975            && markers_match(marker, list_marker)
976        {
977            let matches = if indent_cols >= 4 && *base_indent_cols >= 4 {
978                // Deep indentation:
979                // - bullets stay directional to preserve nesting boundaries
980                // - ordered markers allow small symmetric drift to keep
981                //   marker-width-aligned lists (i./ii./iii.) at one level
982                match (marker, list_marker) {
983                    (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
984                        indent_cols.abs_diff(*base_indent_cols) <= 3
985                    }
986                    _ => indent_cols >= *base_indent_cols && indent_cols <= base_indent_cols + 3,
987                }
988            } else if indent_cols >= 4 || *base_indent_cols >= 4 {
989                // One shallow, one deep:
990                // - ordered markers still allow symmetric drift so aligned roman
991                //   markers (e.g. 3/4/5 spaces for i./ii./iii.) stay at one level
992                // - bullets remain directional to preserve nesting boundaries
993                match (marker, list_marker) {
994                    (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
995                        indent_cols.abs_diff(*base_indent_cols) <= 3
996                    }
997                    _ => false,
998                }
999            } else {
1000                // Both at shallow indentation (0-3)
1001                // Allow items within 3 spaces
1002                indent_cols.abs_diff(*base_indent_cols) <= 3
1003            };
1004
1005            if matches {
1006                let distance = indent_cols.abs_diff(*base_indent_cols);
1007                let base_leq_indent = *base_indent_cols <= indent_cols;
1008
1009                // For deep ordered lists, avoid "nearest below" re-association caused by
1010                // formatter alignment shifts (e.g. i./ii./iii. becoming 6/7/8-space indents).
1011                // Prefer matching the nearest enclosing level whose base indent is >= current.
1012                if is_deep_ordered
1013                    && matches!(
1014                        (marker, list_marker),
1015                        (ListMarker::Ordered(_), ListMarker::Ordered(_))
1016                    )
1017                    && *base_indent_cols >= indent_cols
1018                {
1019                    let delta = *base_indent_cols - indent_cols;
1020                    if best_above_match.is_none_or(|(_, best_delta)| delta < best_delta) {
1021                        best_above_match = Some((i, delta));
1022                    }
1023                }
1024
1025                if let Some((_, best_dist, best_base_leq)) = best_match {
1026                    if distance < best_dist
1027                        || (distance == best_dist && base_leq_indent && !best_base_leq)
1028                    {
1029                        best_match = Some((i, distance, base_leq_indent));
1030                    }
1031                } else {
1032                    best_match = Some((i, distance, base_leq_indent));
1033                }
1034
1035                // If we found an exact match, return immediately
1036                if distance == 0 {
1037                    return Some(i);
1038                }
1039            }
1040        }
1041    }
1042
1043    if let Some((index, _)) = best_above_match {
1044        return Some(index);
1045    }
1046
1047    best_match.map(|(i, _, _)| i)
1048}
1049
1050/// Start a nested list within an existing list item.
1051pub(in crate::parser) fn start_nested_list(
1052    containers: &mut ContainerStack,
1053    builder: &mut GreenNodeBuilder<'static>,
1054    marker: &ListMarker,
1055    item: &ListItemEmissionInput<'_>,
1056    indent_to_emit: Option<&str>,
1057) {
1058    // Emit the indent if needed
1059    if let Some(indent_str) = indent_to_emit {
1060        builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1061    }
1062
1063    // Start nested list
1064    builder.start_node(SyntaxKind::LIST.into());
1065    containers.push(Container::List {
1066        marker: marker.clone(),
1067        base_indent_cols: item.indent_cols,
1068        has_blank_between_items: false,
1069    });
1070
1071    // Add the nested list item
1072    let (content_col, text_to_buffer) = emit_list_item(builder, item);
1073    let mut buffer = ListItemBuffer::new();
1074    if !text_to_buffer.is_empty() {
1075        buffer.push_text(text_to_buffer);
1076    }
1077    containers.push(Container::ListItem {
1078        content_col,
1079        buffer,
1080    });
1081}
1082
1083/// Checks if the content after a list marker is exactly another bullet marker.
1084/// Returns the nested bullet marker character if detected.
1085pub(in crate::parser) fn is_content_nested_bullet_marker(
1086    content: &str,
1087    marker_len: usize,
1088    spaces_after_bytes: usize,
1089) -> Option<char> {
1090    let (_, indent_bytes) = leading_indent(content);
1091    let content_start = indent_bytes + marker_len + spaces_after_bytes;
1092
1093    if content_start >= content.len() {
1094        return None;
1095    }
1096
1097    let remaining = &content[content_start..];
1098    let (text_part, _) = strip_newline(remaining);
1099    let trimmed = text_part.trim();
1100
1101    // Check if it's exactly one of the bullet marker characters
1102    if trimmed.len() == 1 {
1103        let ch = trimmed.chars().next().unwrap();
1104        if matches!(ch, '*' | '+' | '-') {
1105            return Some(ch);
1106        }
1107    }
1108
1109    None
1110}
1111
1112/// Add a list item that contains a nested empty list (for cases like `- *`).
1113/// This creates: LIST_ITEM (outer) -> LIST (nested) -> LIST_ITEM (empty inner)
1114pub(in crate::parser) fn add_list_item_with_nested_empty_list(
1115    containers: &mut ContainerStack,
1116    builder: &mut GreenNodeBuilder<'static>,
1117    item: &ListItemEmissionInput<'_>,
1118    nested_marker: char,
1119) {
1120    // First, emit the outer list item (just marker + whitespace)
1121    builder.start_node(SyntaxKind::LIST_ITEM.into());
1122
1123    // Emit leading indentation for lossless parsing
1124    if item.indent_bytes > 0 {
1125        builder.token(
1126            SyntaxKind::WHITESPACE.into(),
1127            &item.content[..item.indent_bytes],
1128        );
1129    }
1130
1131    let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
1132    builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
1133
1134    if item.spaces_after_bytes > 0 {
1135        let space_start = item.indent_bytes + item.marker_len;
1136        let space_end = space_start + item.spaces_after_bytes;
1137        if space_end <= item.content.len() {
1138            builder.token(
1139                SyntaxKind::WHITESPACE.into(),
1140                &item.content[space_start..space_end],
1141            );
1142        }
1143    }
1144
1145    // Now start the nested list inside this item
1146    builder.start_node(SyntaxKind::LIST.into());
1147
1148    // Add empty list item to the nested list
1149    builder.start_node(SyntaxKind::LIST_ITEM.into());
1150    builder.token(SyntaxKind::LIST_MARKER.into(), &nested_marker.to_string());
1151
1152    // Extract and emit the newline from original content (lossless)
1153    let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
1154    if content_start < item.content.len() {
1155        let remaining = &item.content[content_start..];
1156        // Skip the nested marker character (1 byte) and get the newline
1157        if remaining.len() > 1 {
1158            let (_, newline_str) = strip_newline(&remaining[1..]);
1159            if !newline_str.is_empty() {
1160                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1161            }
1162        }
1163    }
1164
1165    builder.finish_node(); // Close nested LIST_ITEM
1166    builder.finish_node(); // Close nested LIST
1167
1168    // Push container for the outer list item
1169    let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
1170    containers.push(Container::ListItem {
1171        content_col,
1172        buffer: ListItemBuffer::new(),
1173    });
1174}
1175
1176/// Add a list item to the current list.
1177pub(in crate::parser) fn add_list_item(
1178    containers: &mut ContainerStack,
1179    builder: &mut GreenNodeBuilder<'static>,
1180    item: &ListItemEmissionInput<'_>,
1181) {
1182    let (content_col, text_to_buffer) = emit_list_item(builder, item);
1183
1184    log::trace!(
1185        "add_list_item: content={:?}, text_to_buffer={:?}",
1186        item.content,
1187        text_to_buffer
1188    );
1189
1190    let mut buffer = ListItemBuffer::new();
1191    if !text_to_buffer.is_empty() {
1192        buffer.push_text(text_to_buffer);
1193    }
1194    containers.push(Container::ListItem {
1195        content_col,
1196        buffer,
1197    });
1198}