Skip to main content

panache_parser/parser/blocks/
lists.rs

1use crate::options::ParserOptions;
2use crate::syntax::SyntaxKind;
3use rowan::GreenNodeBuilder;
4
5use crate::parser::utils::container_stack::{Container, ContainerStack, leading_indent};
6use crate::parser::utils::helpers::strip_newline;
7use crate::parser::utils::list_item_buffer::ListItemBuffer;
8
9#[derive(Debug, Clone, PartialEq)]
10pub(crate) enum ListMarker {
11    Bullet(char),
12    Ordered(OrderedMarker),
13}
14
15#[derive(Debug, Clone, PartialEq)]
16pub(crate) enum OrderedMarker {
17    Decimal {
18        number: String,
19        style: ListDelimiter,
20    },
21    Hash,
22    LowerAlpha {
23        letter: char,
24        style: ListDelimiter,
25    },
26    UpperAlpha {
27        letter: char,
28        style: ListDelimiter,
29    },
30    LowerRoman {
31        numeral: String,
32        style: ListDelimiter,
33    },
34    UpperRoman {
35        numeral: String,
36        style: ListDelimiter,
37    },
38    Example {
39        label: Option<String>,
40    },
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub(crate) enum ListDelimiter {
45    Period,
46    RightParen,
47    Parens,
48}
49
50#[derive(Debug, Clone, PartialEq)]
51pub(crate) struct ListMarkerMatch {
52    pub(crate) marker: ListMarker,
53    pub(crate) marker_len: usize,
54    pub(crate) spaces_after_cols: usize,
55    pub(crate) spaces_after_bytes: usize,
56}
57
58#[derive(Debug, Clone, Copy)]
59pub(in crate::parser) struct ListItemEmissionInput<'a> {
60    pub content: &'a str,
61    pub marker_len: usize,
62    pub spaces_after_cols: usize,
63    pub spaces_after_bytes: usize,
64    pub indent_cols: usize,
65    pub indent_bytes: usize,
66}
67
68/// Parse a Roman numeral (lower or upper case).
69/// Returns (numeral_string, length) if valid, None otherwise.
70fn try_parse_roman_numeral(text: &str, uppercase: bool) -> Option<(String, usize)> {
71    let valid_chars = if uppercase { "IVXLCDM" } else { "ivxlcdm" };
72
73    let count = text
74        .chars()
75        .take_while(|c| valid_chars.contains(*c))
76        .count();
77
78    if count == 0 {
79        return None;
80    }
81
82    let numeral = &text[..count];
83    let numeral_upper = numeral.to_uppercase();
84
85    // Only consider chars that are valid Roman numeral symbols
86    // Reject if it contains only non-Roman letters (a-z except i, v, x, l, c, d, m)
87    let has_only_roman_chars = numeral_upper.chars().all(|c| "IVXLCDM".contains(c));
88    if !has_only_roman_chars {
89        return None;
90    }
91
92    // For single-character numerals, only accept the most common ones to avoid
93    // ambiguity with alphabetic list markers (a-z, A-Z).
94    // Single L, C, D, M are valid Roman numerals but unlikely in list contexts.
95    if count == 1 {
96        let ch = numeral_upper.chars().next().unwrap();
97        if !matches!(ch, 'I' | 'V' | 'X') {
98            return None;
99        }
100    }
101
102    // Validate it's a proper Roman numeral (basic validation)
103    // Must not have more than 3 consecutive same characters (except M)
104    if numeral_upper.contains("IIII")
105        || numeral_upper.contains("XXXX")
106        || numeral_upper.contains("CCCC")
107        || numeral_upper.contains("VV")
108        || numeral_upper.contains("LL")
109        || numeral_upper.contains("DD")
110    {
111        return None;
112    }
113
114    // Must have valid subtractive notation (I before V/X, X before L/C, C before D/M)
115    // V, L, D can never appear before a larger numeral (no subtractive use)
116    let chars: Vec<char> = numeral_upper.chars().collect();
117    for i in 0..chars.len().saturating_sub(1) {
118        let curr = chars[i];
119        let next = chars[i + 1];
120
121        // Get Roman numeral values for comparison
122        let curr_val = match curr {
123            'I' => 1,
124            'V' => 5,
125            'X' => 10,
126            'L' => 50,
127            'C' => 100,
128            'D' => 500,
129            'M' => 1000,
130            _ => return None,
131        };
132        let next_val = match next {
133            'I' => 1,
134            'V' => 5,
135            'X' => 10,
136            'L' => 50,
137            'C' => 100,
138            'D' => 500,
139            'M' => 1000,
140            _ => return None,
141        };
142
143        // Check for invalid subtractive notation
144        if curr_val < next_val {
145            // Subtractive notation - check if it's valid
146            match (curr, next) {
147                ('I', 'V') | ('I', 'X') => {} // Valid: IV=4, IX=9
148                ('X', 'L') | ('X', 'C') => {} // Valid: XL=40, XC=90
149                ('C', 'D') | ('C', 'M') => {} // Valid: CD=400, CM=900
150                _ => return None,             // Invalid subtractive notation
151            }
152        }
153    }
154
155    Some((numeral.to_string(), count))
156}
157
158pub(crate) fn try_parse_list_marker(line: &str, config: &ParserOptions) -> Option<ListMarkerMatch> {
159    let (_indent_cols, indent_bytes) = leading_indent(line);
160    let trimmed = &line[indent_bytes..];
161
162    // Try bullet markers (including task lists)
163    if let Some(ch) = trimmed.chars().next()
164        && matches!(ch, '*' | '+' | '-')
165    {
166        let after_marker = &trimmed[1..];
167
168        // Check for task list: [ ] or [x] or [X]
169        let trimmed_after = after_marker.trim_start();
170        let is_task = trimmed_after.starts_with('[')
171            && trimmed_after.len() >= 3
172            && matches!(
173                trimmed_after.chars().nth(1),
174                Some(' ') | Some('x') | Some('X')
175            )
176            && trimmed_after.chars().nth(2) == Some(']');
177
178        // Must be followed by whitespace (or be task list)
179        if after_marker.starts_with(' ')
180            || after_marker.starts_with('\t')
181            || after_marker.is_empty()
182            || is_task
183        {
184            let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
185            return Some(ListMarkerMatch {
186                marker: ListMarker::Bullet(ch),
187                marker_len: 1,
188                spaces_after_cols,
189                spaces_after_bytes,
190            });
191        }
192    }
193
194    // Try ordered markers
195    if config.extensions.fancy_lists
196        && let Some(after_marker) = trimmed.strip_prefix("#.")
197        && (after_marker.starts_with(' ')
198            || after_marker.starts_with('\t')
199            || after_marker.is_empty())
200    {
201        let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
202        return Some(ListMarkerMatch {
203            marker: ListMarker::Ordered(OrderedMarker::Hash),
204            marker_len: 2,
205            spaces_after_cols,
206            spaces_after_bytes,
207        });
208    }
209
210    // Try example lists: (@) or (@label)
211    if config.extensions.example_lists
212        && let Some(rest) = trimmed.strip_prefix("(@")
213    {
214        // Check if it has a label or is just (@)
215        let label_end = rest
216            .chars()
217            .take_while(|c| c.is_alphanumeric() || *c == '_' || *c == '-')
218            .count();
219
220        // Must be followed by ')'
221        if rest.len() > label_end && rest.chars().nth(label_end) == Some(')') {
222            let label = if label_end > 0 {
223                Some(rest[..label_end].to_string())
224            } else {
225                None
226            };
227
228            let after_marker = &rest[label_end + 1..];
229            if after_marker.starts_with(' ')
230                || after_marker.starts_with('\t')
231                || after_marker.is_empty()
232            {
233                let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
234                let marker_len = 2 + label_end + 1; // "(@" + label + ")"
235                return Some(ListMarkerMatch {
236                    marker: ListMarker::Ordered(OrderedMarker::Example { label }),
237                    marker_len,
238                    spaces_after_cols,
239                    spaces_after_bytes,
240                });
241            }
242        }
243    }
244
245    // Try parenthesized markers: (2), (a), (ii)
246    if let Some(rest) = trimmed.strip_prefix('(') {
247        if config.extensions.fancy_lists {
248            // Try decimal: (2)
249            let digit_count = rest.chars().take_while(|c| c.is_ascii_digit()).count();
250            if digit_count > 0
251                && rest.len() > digit_count
252                && rest.chars().nth(digit_count) == Some(')')
253            {
254                let number = &rest[..digit_count];
255                let after_marker = &rest[digit_count + 1..];
256                if after_marker.starts_with(' ')
257                    || after_marker.starts_with('\t')
258                    || after_marker.is_empty()
259                {
260                    let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
261                    let marker_len = 2 + digit_count;
262                    return Some(ListMarkerMatch {
263                        marker: ListMarker::Ordered(OrderedMarker::Decimal {
264                            number: number.to_string(),
265                            style: ListDelimiter::Parens,
266                        }),
267                        marker_len,
268                        spaces_after_cols,
269                        spaces_after_bytes,
270                    });
271                }
272            }
273        }
274
275        // Try fancy lists if enabled (parenthesized markers)
276        if config.extensions.fancy_lists {
277            // Try Roman numerals first (to avoid ambiguity with letters i, v, x, etc.)
278
279            // Try lowercase Roman: (ii)
280            if let Some((numeral, len)) = try_parse_roman_numeral(rest, false)
281                && rest.len() > len
282                && rest.chars().nth(len) == Some(')')
283            {
284                let after_marker = &rest[len + 1..];
285                if after_marker.starts_with(' ')
286                    || after_marker.starts_with('\t')
287                    || after_marker.is_empty()
288                {
289                    let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
290                    return Some(ListMarkerMatch {
291                        marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
292                            numeral,
293                            style: ListDelimiter::Parens,
294                        }),
295                        marker_len: len + 2,
296                        spaces_after_cols,
297                        spaces_after_bytes,
298                    });
299                }
300            }
301
302            // Try uppercase Roman: (II)
303            if let Some((numeral, len)) = try_parse_roman_numeral(rest, true)
304                && rest.len() > len
305                && rest.chars().nth(len) == Some(')')
306            {
307                let after_marker = &rest[len + 1..];
308                if after_marker.starts_with(' ')
309                    || after_marker.starts_with('\t')
310                    || after_marker.is_empty()
311                {
312                    let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
313                    return Some(ListMarkerMatch {
314                        marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
315                            numeral,
316                            style: ListDelimiter::Parens,
317                        }),
318                        marker_len: len + 2,
319                        spaces_after_cols,
320                        spaces_after_bytes,
321                    });
322                }
323            }
324
325            // Try lowercase letter: (a)
326            if let Some(ch) = rest.chars().next()
327                && ch.is_ascii_lowercase()
328                && rest.len() > 1
329                && rest.chars().nth(1) == Some(')')
330            {
331                let after_marker = &rest[2..];
332                if after_marker.starts_with(' ')
333                    || after_marker.starts_with('\t')
334                    || after_marker.is_empty()
335                {
336                    let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
337                    return Some(ListMarkerMatch {
338                        marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
339                            letter: ch,
340                            style: ListDelimiter::Parens,
341                        }),
342                        marker_len: 3,
343                        spaces_after_cols,
344                        spaces_after_bytes,
345                    });
346                }
347            }
348
349            // Try uppercase letter: (A)
350            if let Some(ch) = rest.chars().next()
351                && ch.is_ascii_uppercase()
352                && rest.len() > 1
353                && rest.chars().nth(1) == Some(')')
354            {
355                let after_marker = &rest[2..];
356                if after_marker.starts_with(' ')
357                    || after_marker.starts_with('\t')
358                    || after_marker.is_empty()
359                {
360                    let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
361                    return Some(ListMarkerMatch {
362                        marker: ListMarker::Ordered(OrderedMarker::UpperAlpha {
363                            letter: ch,
364                            style: ListDelimiter::Parens,
365                        }),
366                        marker_len: 3,
367                        spaces_after_cols,
368                        spaces_after_bytes,
369                    });
370                }
371            }
372        }
373    }
374
375    // Try decimal numbers: 1. or 1)
376    let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
377    if digit_count > 0 && trimmed.len() > digit_count {
378        let number = &trimmed[..digit_count];
379        let delim = trimmed.chars().nth(digit_count);
380
381        let (style, marker_len) = match delim {
382            Some('.') => (ListDelimiter::Period, digit_count + 1),
383            Some(')') => (ListDelimiter::RightParen, digit_count + 1),
384            _ => return None,
385        };
386        if style == ListDelimiter::RightParen && !config.extensions.fancy_lists {
387            return None;
388        }
389
390        let after_marker = &trimmed[marker_len..];
391        if after_marker.starts_with(' ')
392            || after_marker.starts_with('\t')
393            || after_marker.is_empty()
394        {
395            let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
396            return Some(ListMarkerMatch {
397                marker: ListMarker::Ordered(OrderedMarker::Decimal {
398                    number: number.to_string(),
399                    style,
400                }),
401                marker_len,
402                spaces_after_cols,
403                spaces_after_bytes,
404            });
405        }
406    }
407
408    // Try fancy lists if enabled (non-parenthesized)
409    if config.extensions.fancy_lists {
410        // Try Roman numerals first, as they may overlap with letters
411
412        // Try lowercase Roman: i. or ii)
413        if let Some((numeral, len)) = try_parse_roman_numeral(trimmed, false)
414            && trimmed.len() > len
415            && let Some(delim) = trimmed.chars().nth(len)
416            && (delim == '.' || delim == ')')
417        {
418            let style = if delim == '.' {
419                ListDelimiter::Period
420            } else {
421                ListDelimiter::RightParen
422            };
423            let marker_len = len + 1;
424
425            let after_marker = &trimmed[marker_len..];
426            if after_marker.starts_with(' ')
427                || after_marker.starts_with('\t')
428                || after_marker.is_empty()
429            {
430                let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
431                return Some(ListMarkerMatch {
432                    marker: ListMarker::Ordered(OrderedMarker::LowerRoman { numeral, style }),
433                    marker_len,
434                    spaces_after_cols,
435                    spaces_after_bytes,
436                });
437            }
438        }
439
440        // Try uppercase Roman: I. or II)
441        if let Some((numeral, len)) = try_parse_roman_numeral(trimmed, true)
442            && trimmed.len() > len
443            && let Some(delim) = trimmed.chars().nth(len)
444            && (delim == '.' || delim == ')')
445        {
446            let style = if delim == '.' {
447                ListDelimiter::Period
448            } else {
449                ListDelimiter::RightParen
450            };
451            let marker_len = len + 1;
452
453            let after_marker = &trimmed[marker_len..];
454            if after_marker.starts_with(' ')
455                || after_marker.starts_with('\t')
456                || after_marker.is_empty()
457            {
458                let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
459                return Some(ListMarkerMatch {
460                    marker: ListMarker::Ordered(OrderedMarker::UpperRoman { numeral, style }),
461                    marker_len,
462                    spaces_after_cols,
463                    spaces_after_bytes,
464                });
465            }
466        }
467
468        // Try lowercase letter: a. or a)
469        if let Some(ch) = trimmed.chars().next()
470            && ch.is_ascii_lowercase()
471            && trimmed.len() > 1
472            && let Some(delim) = trimmed.chars().nth(1)
473            && (delim == '.' || delim == ')')
474        {
475            let style = if delim == '.' {
476                ListDelimiter::Period
477            } else {
478                ListDelimiter::RightParen
479            };
480            let marker_len = 2;
481
482            let after_marker = &trimmed[marker_len..];
483            if after_marker.starts_with(' ')
484                || after_marker.starts_with('\t')
485                || after_marker.is_empty()
486            {
487                let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
488                return Some(ListMarkerMatch {
489                    marker: ListMarker::Ordered(OrderedMarker::LowerAlpha { letter: ch, style }),
490                    marker_len,
491                    spaces_after_cols,
492                    spaces_after_bytes,
493                });
494            }
495        }
496
497        // Try uppercase letter: A. or A)
498        if let Some(ch) = trimmed.chars().next()
499            && ch.is_ascii_uppercase()
500            && trimmed.len() > 1
501            && let Some(delim) = trimmed.chars().nth(1)
502            && (delim == '.' || delim == ')')
503        {
504            let style = if delim == '.' {
505                ListDelimiter::Period
506            } else {
507                ListDelimiter::RightParen
508            };
509            let marker_len = 2;
510
511            let after_marker = &trimmed[marker_len..];
512            // Special rule: uppercase letter with period needs 2 spaces minimum
513            let min_spaces = if delim == '.' { 2 } else { 1 };
514            let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
515
516            if (after_marker.starts_with(' ') || after_marker.starts_with('\t'))
517                && spaces_after_cols >= min_spaces
518            {
519                return Some(ListMarkerMatch {
520                    marker: ListMarker::Ordered(OrderedMarker::UpperAlpha { letter: ch, style }),
521                    marker_len,
522                    spaces_after_cols,
523                    spaces_after_bytes,
524                });
525            }
526        }
527    }
528
529    None
530}
531
532pub(crate) fn markers_match(a: &ListMarker, b: &ListMarker) -> bool {
533    match (a, b) {
534        // All bullet list markers (-, *, +) are considered matching (Pandoc behavior)
535        (ListMarker::Bullet(_), ListMarker::Bullet(_)) => true,
536        (ListMarker::Ordered(OrderedMarker::Hash), ListMarker::Ordered(OrderedMarker::Hash)) => {
537            true
538        }
539        (
540            ListMarker::Ordered(OrderedMarker::Decimal { style: s1, .. }),
541            ListMarker::Ordered(OrderedMarker::Decimal { style: s2, .. }),
542        ) => s1 == s2,
543        (
544            ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s1, .. }),
545            ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s2, .. }),
546        ) => s1 == s2,
547        (
548            ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s1, .. }),
549            ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s2, .. }),
550        ) => s1 == s2,
551        (
552            ListMarker::Ordered(OrderedMarker::LowerRoman { style: s1, .. }),
553            ListMarker::Ordered(OrderedMarker::LowerRoman { style: s2, .. }),
554        ) => s1 == s2,
555        (
556            ListMarker::Ordered(OrderedMarker::UpperRoman { style: s1, .. }),
557            ListMarker::Ordered(OrderedMarker::UpperRoman { style: s2, .. }),
558        ) => s1 == s2,
559        (
560            ListMarker::Ordered(OrderedMarker::Example { .. }),
561            ListMarker::Ordered(OrderedMarker::Example { .. }),
562        ) => true, // All example list items match each other
563        _ => false,
564    }
565}
566
567/// Emit a list item node to the builder (marker and whitespace only).
568/// Returns (content_col, text_to_buffer) where text_to_buffer is the content that should be
569/// added to the list item buffer for later inline parsing.
570pub(in crate::parser) fn emit_list_item(
571    builder: &mut GreenNodeBuilder<'static>,
572    item: &ListItemEmissionInput<'_>,
573) -> (usize, String) {
574    builder.start_node(SyntaxKind::LIST_ITEM.into());
575
576    // Emit leading indentation for lossless parsing
577    if item.indent_bytes > 0 {
578        builder.token(
579            SyntaxKind::WHITESPACE.into(),
580            &item.content[..item.indent_bytes],
581        );
582    }
583
584    let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
585    builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
586
587    if item.spaces_after_bytes > 0 {
588        let space_start = item.indent_bytes + item.marker_len;
589        let space_end = space_start + item.spaces_after_bytes;
590        if space_end <= item.content.len() {
591            builder.token(
592                SyntaxKind::WHITESPACE.into(),
593                &item.content[space_start..space_end],
594            );
595        }
596    }
597
598    let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
599    let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
600
601    // Extract text content to be buffered (instead of emitting it directly).
602    // If the item starts with a task checkbox, emit it as a dedicated token so it
603    // doesn't get parsed as a link.
604    let text_to_buffer = if content_start < item.content.len() {
605        let rest = &item.content[content_start..];
606        if (rest.starts_with("[ ]") || rest.starts_with("[x]") || rest.starts_with("[X]"))
607            && rest
608                .as_bytes()
609                .get(3)
610                .is_some_and(|b| (*b as char).is_whitespace())
611        {
612            builder.token(SyntaxKind::TASK_CHECKBOX.into(), &rest[..3]);
613            rest[3..].to_string()
614        } else {
615            rest.to_string()
616        }
617    } else {
618        String::new()
619    };
620
621    (content_col, text_to_buffer)
622}
623
624#[cfg(test)]
625mod tests {
626    use super::*;
627    use crate::options::ParserOptions;
628
629    #[test]
630    fn detects_bullet_markers() {
631        let config = ParserOptions::default();
632        assert!(try_parse_list_marker("* item", &config).is_some());
633        assert!(try_parse_list_marker("*\titem", &config).is_some());
634    }
635
636    #[test]
637    fn detects_fancy_alpha_markers() {
638        let mut config = ParserOptions::default();
639        config.extensions.fancy_lists = true;
640
641        // Test lowercase alpha period
642        assert!(
643            try_parse_list_marker("a. item", &config).is_some(),
644            "a. should parse"
645        );
646        assert!(
647            try_parse_list_marker("b. item", &config).is_some(),
648            "b. should parse"
649        );
650        assert!(
651            try_parse_list_marker("c. item", &config).is_some(),
652            "c. should parse"
653        );
654
655        // Test lowercase alpha right paren
656        assert!(
657            try_parse_list_marker("a) item", &config).is_some(),
658            "a) should parse"
659        );
660        assert!(
661            try_parse_list_marker("b) item", &config).is_some(),
662            "b) should parse"
663        );
664    }
665}
666
667#[test]
668fn markers_match_fancy_lists() {
669    use ListDelimiter::*;
670    use ListMarker::*;
671    use OrderedMarker::*;
672
673    // Same type and style should match
674    let a_period = Ordered(LowerAlpha {
675        letter: 'a',
676        style: Period,
677    });
678    let b_period = Ordered(LowerAlpha {
679        letter: 'b',
680        style: Period,
681    });
682    assert!(
683        markers_match(&a_period, &b_period),
684        "a. and b. should match"
685    );
686
687    let i_period = Ordered(LowerRoman {
688        numeral: "i".to_string(),
689        style: Period,
690    });
691    let ii_period = Ordered(LowerRoman {
692        numeral: "ii".to_string(),
693        style: Period,
694    });
695    assert!(
696        markers_match(&i_period, &ii_period),
697        "i. and ii. should match"
698    );
699
700    // Different styles should not match
701    let a_paren = Ordered(LowerAlpha {
702        letter: 'a',
703        style: RightParen,
704    });
705    assert!(
706        !markers_match(&a_period, &a_paren),
707        "a. and a) should not match"
708    );
709}
710
711#[test]
712fn detects_complex_roman_numerals() {
713    let mut config = ParserOptions::default();
714    config.extensions.fancy_lists = true;
715
716    // Test various Roman numerals
717    assert!(
718        try_parse_list_marker("iv. item", &config).is_some(),
719        "iv. should parse"
720    );
721    assert!(
722        try_parse_list_marker("v. item", &config).is_some(),
723        "v. should parse"
724    );
725    assert!(
726        try_parse_list_marker("vi. item", &config).is_some(),
727        "vi. should parse"
728    );
729    assert!(
730        try_parse_list_marker("vii. item", &config).is_some(),
731        "vii. should parse"
732    );
733    assert!(
734        try_parse_list_marker("viii. item", &config).is_some(),
735        "viii. should parse"
736    );
737    assert!(
738        try_parse_list_marker("ix. item", &config).is_some(),
739        "ix. should parse"
740    );
741    assert!(
742        try_parse_list_marker("x. item", &config).is_some(),
743        "x. should parse"
744    );
745}
746
747#[test]
748fn detects_example_list_markers() {
749    let mut config = ParserOptions::default();
750    config.extensions.example_lists = true;
751
752    // Test unlabeled example
753    assert!(
754        try_parse_list_marker("(@) item", &config).is_some(),
755        "(@) should parse"
756    );
757
758    // Test labeled examples
759    assert!(
760        try_parse_list_marker("(@foo) item", &config).is_some(),
761        "(@foo) should parse"
762    );
763    assert!(
764        try_parse_list_marker("(@my_label) item", &config).is_some(),
765        "(@my_label) should parse"
766    );
767    assert!(
768        try_parse_list_marker("(@test-123) item", &config).is_some(),
769        "(@test-123) should parse"
770    );
771
772    // Test with extension disabled
773    let disabled_config = ParserOptions {
774        extensions: crate::options::Extensions {
775            example_lists: false,
776            ..Default::default()
777        },
778        ..Default::default()
779    };
780    assert!(
781        try_parse_list_marker("(@) item", &disabled_config).is_none(),
782        "(@) should not parse when extension disabled"
783    );
784}
785
786#[test]
787fn deep_ordered_prefers_nearest_enclosing_indent_over_nearest_below() {
788    use crate::parser::utils::container_stack::{Container, ContainerStack};
789
790    let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
791        numeral: "ii".to_string(),
792        style: ListDelimiter::Period,
793    });
794
795    let mut containers = ContainerStack::new();
796    containers.push(Container::List {
797        marker: marker.clone(),
798        base_indent_cols: 8,
799        has_blank_between_items: false,
800    });
801    containers.push(Container::ListItem {
802        content_col: 11,
803        buffer: crate::parser::utils::list_item_buffer::ListItemBuffer::new(),
804    });
805    containers.push(Container::List {
806        marker,
807        base_indent_cols: 6,
808        has_blank_between_items: false,
809    });
810
811    // With deep ordered drift (indent 7), we should keep the enclosing level
812    // (base indent 8), not re-associate to the nearest lower sibling level (6).
813    assert_eq!(
814        find_matching_list_level(
815            &containers,
816            &ListMarker::Ordered(OrderedMarker::LowerRoman {
817                numeral: "iii".to_string(),
818                style: ListDelimiter::Period,
819            }),
820            7
821        ),
822        Some(0)
823    );
824}
825
826#[test]
827fn deep_ordered_matches_exact_indent_when_available() {
828    use crate::parser::utils::container_stack::{Container, ContainerStack};
829
830    let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
831        numeral: "ii".to_string(),
832        style: ListDelimiter::Period,
833    });
834
835    let mut containers = ContainerStack::new();
836    containers.push(Container::List {
837        marker: marker.clone(),
838        base_indent_cols: 8,
839        has_blank_between_items: false,
840    });
841    containers.push(Container::List {
842        marker,
843        base_indent_cols: 6,
844        has_blank_between_items: false,
845    });
846
847    assert_eq!(
848        find_matching_list_level(
849            &containers,
850            &ListMarker::Ordered(OrderedMarker::LowerRoman {
851                numeral: "iii".to_string(),
852                style: ListDelimiter::Period,
853            }),
854            6
855        ),
856        Some(1)
857    );
858}
859
860#[test]
861fn parses_nested_bullet_list_from_single_marker() {
862    use crate::parse;
863    use crate::syntax::SyntaxKind;
864
865    let config = ParserOptions::default();
866
867    // Test all three bullet marker combinations as nested lists
868    for (input, desc) in [("- *\n", "- *"), ("- +\n", "- +"), ("- -\n", "- -")] {
869        let tree = parse(input, Some(config.clone()));
870
871        // tree IS the DOCUMENT node
872        assert_eq!(
873            tree.kind(),
874            SyntaxKind::DOCUMENT,
875            "{desc}: root should be DOCUMENT"
876        );
877
878        // Should have a LIST as first child of DOCUMENT
879        let outer_list = tree
880            .children()
881            .find(|n| n.kind() == SyntaxKind::LIST)
882            .unwrap_or_else(|| panic!("{desc}: should have outer LIST node"));
883
884        // Outer list should have a LIST_ITEM
885        let outer_item = outer_list
886            .children()
887            .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
888            .unwrap_or_else(|| panic!("{desc}: should have outer LIST_ITEM"));
889
890        // Outer list item should contain a nested LIST (not PLAIN with TEXT)
891        let nested_list = outer_item
892            .children()
893            .find(|n| n.kind() == SyntaxKind::LIST)
894            .unwrap_or_else(|| {
895                panic!(
896                    "{desc}: outer LIST_ITEM should contain nested LIST, got: {:?}",
897                    outer_item.children().map(|n| n.kind()).collect::<Vec<_>>()
898                )
899            });
900
901        // Nested list should have a LIST_ITEM
902        let nested_item = nested_list
903            .children()
904            .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
905            .unwrap_or_else(|| panic!("{desc}: nested LIST should have LIST_ITEM"));
906
907        // Nested list item should be empty (no PLAIN or TEXT content)
908        let has_plain = nested_item
909            .children()
910            .any(|n| n.kind() == SyntaxKind::PLAIN);
911        assert!(
912            !has_plain,
913            "{desc}: nested LIST_ITEM should not have PLAIN node (should be empty)"
914        );
915    }
916}
917
918// Helper functions for list management in Parser
919
920/// Check if we're in any list.
921pub(in crate::parser) fn in_list(containers: &ContainerStack) -> bool {
922    containers
923        .stack
924        .iter()
925        .any(|c| matches!(c, Container::List { .. }))
926}
927
928/// Check if we're in a list inside a blockquote.
929pub(in crate::parser) fn in_blockquote_list(containers: &ContainerStack) -> bool {
930    let mut seen_blockquote = false;
931    for c in &containers.stack {
932        if matches!(c, Container::BlockQuote { .. }) {
933            seen_blockquote = true;
934        }
935        if seen_blockquote && matches!(c, Container::List { .. }) {
936            return true;
937        }
938    }
939    false
940}
941
942/// Find matching list level for a marker with the given indent.
943pub(in crate::parser) fn find_matching_list_level(
944    containers: &ContainerStack,
945    marker: &ListMarker,
946    indent_cols: usize,
947) -> Option<usize> {
948    // Search from deepest (last) to shallowest (first)
949    // But for shallow items (0-3 indent), prefer matching at the closest base indent
950    let mut best_match: Option<(usize, usize, bool)> = None; // (index, distance, base_leq_indent)
951
952    let is_deep_ordered = matches!(marker, ListMarker::Ordered(_)) && indent_cols >= 4;
953    let mut best_above_match: Option<(usize, usize)> = None; // (index, delta = base - indent), ordered deep only
954
955    for (i, c) in containers.stack.iter().enumerate().rev() {
956        if let Container::List {
957            marker: list_marker,
958            base_indent_cols,
959            ..
960        } = c
961            && markers_match(marker, list_marker)
962        {
963            let matches = if indent_cols >= 4 && *base_indent_cols >= 4 {
964                // Deep indentation:
965                // - bullets stay directional to preserve nesting boundaries
966                // - ordered markers allow small symmetric drift to keep
967                //   marker-width-aligned lists (i./ii./iii.) at one level
968                match (marker, list_marker) {
969                    (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
970                        indent_cols.abs_diff(*base_indent_cols) <= 3
971                    }
972                    _ => indent_cols >= *base_indent_cols && indent_cols <= base_indent_cols + 3,
973                }
974            } else if indent_cols >= 4 || *base_indent_cols >= 4 {
975                // One shallow, one deep:
976                // - ordered markers still allow symmetric drift so aligned roman
977                //   markers (e.g. 3/4/5 spaces for i./ii./iii.) stay at one level
978                // - bullets remain directional to preserve nesting boundaries
979                match (marker, list_marker) {
980                    (ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
981                        indent_cols.abs_diff(*base_indent_cols) <= 3
982                    }
983                    _ => false,
984                }
985            } else {
986                // Both at shallow indentation (0-3)
987                // Allow items within 3 spaces
988                indent_cols.abs_diff(*base_indent_cols) <= 3
989            };
990
991            if matches {
992                let distance = indent_cols.abs_diff(*base_indent_cols);
993                let base_leq_indent = *base_indent_cols <= indent_cols;
994
995                // For deep ordered lists, avoid "nearest below" re-association caused by
996                // formatter alignment shifts (e.g. i./ii./iii. becoming 6/7/8-space indents).
997                // Prefer matching the nearest enclosing level whose base indent is >= current.
998                if is_deep_ordered
999                    && matches!(
1000                        (marker, list_marker),
1001                        (ListMarker::Ordered(_), ListMarker::Ordered(_))
1002                    )
1003                    && *base_indent_cols >= indent_cols
1004                {
1005                    let delta = *base_indent_cols - indent_cols;
1006                    if best_above_match.is_none_or(|(_, best_delta)| delta < best_delta) {
1007                        best_above_match = Some((i, delta));
1008                    }
1009                }
1010
1011                if let Some((_, best_dist, best_base_leq)) = best_match {
1012                    if distance < best_dist
1013                        || (distance == best_dist && base_leq_indent && !best_base_leq)
1014                    {
1015                        best_match = Some((i, distance, base_leq_indent));
1016                    }
1017                } else {
1018                    best_match = Some((i, distance, base_leq_indent));
1019                }
1020
1021                // If we found an exact match, return immediately
1022                if distance == 0 {
1023                    return Some(i);
1024                }
1025            }
1026        }
1027    }
1028
1029    if let Some((index, _)) = best_above_match {
1030        return Some(index);
1031    }
1032
1033    best_match.map(|(i, _, _)| i)
1034}
1035
1036/// Start a nested list within an existing list item.
1037pub(in crate::parser) fn start_nested_list(
1038    containers: &mut ContainerStack,
1039    builder: &mut GreenNodeBuilder<'static>,
1040    marker: &ListMarker,
1041    item: &ListItemEmissionInput<'_>,
1042    indent_to_emit: Option<&str>,
1043) {
1044    // Emit the indent if needed
1045    if let Some(indent_str) = indent_to_emit {
1046        builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
1047    }
1048
1049    // Start nested list
1050    builder.start_node(SyntaxKind::LIST.into());
1051    containers.push(Container::List {
1052        marker: marker.clone(),
1053        base_indent_cols: item.indent_cols,
1054        has_blank_between_items: false,
1055    });
1056
1057    // Add the nested list item
1058    let (content_col, text_to_buffer) = emit_list_item(builder, item);
1059    let mut buffer = ListItemBuffer::new();
1060    if !text_to_buffer.is_empty() {
1061        buffer.push_text(text_to_buffer);
1062    }
1063    containers.push(Container::ListItem {
1064        content_col,
1065        buffer,
1066    });
1067}
1068
1069/// Checks if the content after a list marker is exactly another bullet marker.
1070/// Returns the nested bullet marker character if detected.
1071pub(in crate::parser) fn is_content_nested_bullet_marker(
1072    content: &str,
1073    marker_len: usize,
1074    spaces_after_bytes: usize,
1075) -> Option<char> {
1076    let (_, indent_bytes) = leading_indent(content);
1077    let content_start = indent_bytes + marker_len + spaces_after_bytes;
1078
1079    if content_start >= content.len() {
1080        return None;
1081    }
1082
1083    let remaining = &content[content_start..];
1084    let (text_part, _) = strip_newline(remaining);
1085    let trimmed = text_part.trim();
1086
1087    // Check if it's exactly one of the bullet marker characters
1088    if trimmed.len() == 1 {
1089        let ch = trimmed.chars().next().unwrap();
1090        if matches!(ch, '*' | '+' | '-') {
1091            return Some(ch);
1092        }
1093    }
1094
1095    None
1096}
1097
1098/// Add a list item that contains a nested empty list (for cases like `- *`).
1099/// This creates: LIST_ITEM (outer) -> LIST (nested) -> LIST_ITEM (empty inner)
1100pub(in crate::parser) fn add_list_item_with_nested_empty_list(
1101    containers: &mut ContainerStack,
1102    builder: &mut GreenNodeBuilder<'static>,
1103    item: &ListItemEmissionInput<'_>,
1104    nested_marker: char,
1105) {
1106    // First, emit the outer list item (just marker + whitespace)
1107    builder.start_node(SyntaxKind::LIST_ITEM.into());
1108
1109    // Emit leading indentation for lossless parsing
1110    if item.indent_bytes > 0 {
1111        builder.token(
1112            SyntaxKind::WHITESPACE.into(),
1113            &item.content[..item.indent_bytes],
1114        );
1115    }
1116
1117    let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
1118    builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
1119
1120    if item.spaces_after_bytes > 0 {
1121        let space_start = item.indent_bytes + item.marker_len;
1122        let space_end = space_start + item.spaces_after_bytes;
1123        if space_end <= item.content.len() {
1124            builder.token(
1125                SyntaxKind::WHITESPACE.into(),
1126                &item.content[space_start..space_end],
1127            );
1128        }
1129    }
1130
1131    // Now start the nested list inside this item
1132    builder.start_node(SyntaxKind::LIST.into());
1133
1134    // Add empty list item to the nested list
1135    builder.start_node(SyntaxKind::LIST_ITEM.into());
1136    builder.token(SyntaxKind::LIST_MARKER.into(), &nested_marker.to_string());
1137
1138    // Extract and emit the newline from original content (lossless)
1139    let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
1140    if content_start < item.content.len() {
1141        let remaining = &item.content[content_start..];
1142        // Skip the nested marker character (1 byte) and get the newline
1143        if remaining.len() > 1 {
1144            let (_, newline_str) = strip_newline(&remaining[1..]);
1145            if !newline_str.is_empty() {
1146                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1147            }
1148        }
1149    }
1150
1151    builder.finish_node(); // Close nested LIST_ITEM
1152    builder.finish_node(); // Close nested LIST
1153
1154    // Push container for the outer list item
1155    let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
1156    containers.push(Container::ListItem {
1157        content_col,
1158        buffer: ListItemBuffer::new(),
1159    });
1160}
1161
1162/// Add a list item to the current list.
1163pub(in crate::parser) fn add_list_item(
1164    containers: &mut ContainerStack,
1165    builder: &mut GreenNodeBuilder<'static>,
1166    item: &ListItemEmissionInput<'_>,
1167) {
1168    let (content_col, text_to_buffer) = emit_list_item(builder, item);
1169
1170    log::trace!(
1171        "add_list_item: content={:?}, text_to_buffer={:?}",
1172        item.content,
1173        text_to_buffer
1174    );
1175
1176    let mut buffer = ListItemBuffer::new();
1177    if !text_to_buffer.is_empty() {
1178        buffer.push_text(text_to_buffer);
1179    }
1180    containers.push(Container::ListItem {
1181        content_col,
1182        buffer,
1183    });
1184}