Skip to main content

panache_parser/parser/blocks/
lists.rs

1use crate::options::ParserOptions;
2use crate::syntax::SyntaxKind;
3use rowan::GreenNodeBuilder;
4
5use crate::parser::utils::container_stack::{Container, ContainerStack, leading_indent};
6use crate::parser::utils::helpers::strip_newline;
7use crate::parser::utils::list_item_buffer::ListItemBuffer;
8
9#[derive(Debug, Clone, PartialEq)]
10pub(crate) enum ListMarker {
11    Bullet(char),
12    Ordered(OrderedMarker),
13}
14
15#[derive(Debug, Clone, PartialEq)]
16pub(crate) enum OrderedMarker {
17    Decimal {
18        number: String,
19        style: ListDelimiter,
20    },
21    Hash,
22    LowerAlpha {
23        letter: char,
24        style: ListDelimiter,
25    },
26    UpperAlpha {
27        letter: char,
28        style: ListDelimiter,
29    },
30    LowerRoman {
31        numeral: String,
32        style: ListDelimiter,
33    },
34    UpperRoman {
35        numeral: String,
36        style: ListDelimiter,
37    },
38    Example {
39        label: Option<String>,
40    },
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub(crate) enum ListDelimiter {
45    Period,
46    RightParen,
47    Parens,
48}
49
50#[derive(Debug, Clone, PartialEq)]
51pub(crate) struct ListMarkerMatch {
52    pub(crate) marker: ListMarker,
53    pub(crate) marker_len: usize,
54    pub(crate) spaces_after_cols: usize,
55    pub(crate) spaces_after_bytes: usize,
56}
57
58#[derive(Debug, Clone, Copy)]
59pub(in crate::parser) struct ListItemEmissionInput<'a> {
60    pub content: &'a str,
61    pub marker_len: usize,
62    pub spaces_after_cols: usize,
63    pub spaces_after_bytes: usize,
64    pub indent_cols: usize,
65    pub indent_bytes: usize,
66}
67
68/// Parse a Roman numeral (lower or upper case).
69/// Returns (numeral_string, length) if valid, None otherwise.
70fn try_parse_roman_numeral(text: &str, uppercase: bool) -> Option<(String, usize)> {
71    let valid_chars = if uppercase { "IVXLCDM" } else { "ivxlcdm" };
72
73    let count = text
74        .chars()
75        .take_while(|c| valid_chars.contains(*c))
76        .count();
77
78    if count == 0 {
79        return None;
80    }
81
82    let numeral = &text[..count];
83    let numeral_upper = numeral.to_uppercase();
84
85    // Only consider chars that are valid Roman numeral symbols
86    // Reject if it contains only non-Roman letters (a-z except i, v, x, l, c, d, m)
87    let has_only_roman_chars = numeral_upper.chars().all(|c| "IVXLCDM".contains(c));
88    if !has_only_roman_chars {
89        return None;
90    }
91
92    // For single-character numerals, only accept the most common ones to avoid
93    // ambiguity with alphabetic list markers (a-z, A-Z).
94    // Single L, C, D, M are valid Roman numerals but unlikely in list contexts.
95    if count == 1 {
96        let ch = numeral_upper.chars().next().unwrap();
97        if !matches!(ch, 'I' | 'V' | 'X') {
98            return None;
99        }
100    }
101
102    // Validate it's a proper Roman numeral (basic validation)
103    // Must not have more than 3 consecutive same characters (except M)
104    if numeral_upper.contains("IIII")
105        || numeral_upper.contains("XXXX")
106        || numeral_upper.contains("CCCC")
107        || numeral_upper.contains("VV")
108        || numeral_upper.contains("LL")
109        || numeral_upper.contains("DD")
110    {
111        return None;
112    }
113
114    // Must have valid subtractive notation (I before V/X, X before L/C, C before D/M)
115    // V, L, D can never appear before a larger numeral (no subtractive use)
116    let chars: Vec<char> = numeral_upper.chars().collect();
117    for i in 0..chars.len().saturating_sub(1) {
118        let curr = chars[i];
119        let next = chars[i + 1];
120
121        // Get Roman numeral values for comparison
122        let curr_val = match curr {
123            'I' => 1,
124            'V' => 5,
125            'X' => 10,
126            'L' => 50,
127            'C' => 100,
128            'D' => 500,
129            'M' => 1000,
130            _ => return None,
131        };
132        let next_val = match next {
133            'I' => 1,
134            'V' => 5,
135            'X' => 10,
136            'L' => 50,
137            'C' => 100,
138            'D' => 500,
139            'M' => 1000,
140            _ => return None,
141        };
142
143        // Check for invalid subtractive notation
144        if curr_val < next_val {
145            // Subtractive notation - check if it's valid
146            match (curr, next) {
147                ('I', 'V') | ('I', 'X') => {} // Valid: IV=4, IX=9
148                ('X', 'L') | ('X', 'C') => {} // Valid: XL=40, XC=90
149                ('C', 'D') | ('C', 'M') => {} // Valid: CD=400, CM=900
150                _ => return None,             // Invalid subtractive notation
151            }
152        }
153    }
154
155    Some((numeral.to_string(), count))
156}
157
158pub(crate) fn try_parse_list_marker(line: &str, config: &ParserOptions) -> Option<ListMarkerMatch> {
159    let (_indent_cols, indent_bytes) = leading_indent(line);
160    let trimmed = &line[indent_bytes..];
161
162    // Try bullet markers (including task lists)
163    if let Some(ch) = trimmed.chars().next()
164        && matches!(ch, '*' | '+' | '-')
165    {
166        let after_marker = &trimmed[1..];
167
168        // Check for task list: [ ] or [x] or [X]
169        let trimmed_after = after_marker.trim_start();
170        let is_task = trimmed_after.starts_with('[')
171            && trimmed_after.len() >= 3
172            && matches!(
173                trimmed_after.chars().nth(1),
174                Some(' ') | Some('x') | Some('X')
175            )
176            && trimmed_after.chars().nth(2) == Some(']');
177
178        // Must be followed by whitespace (or be task list)
179        if after_marker.starts_with(' ')
180            || after_marker.starts_with('\t')
181            || after_marker.is_empty()
182            || is_task
183        {
184            let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
185            return Some(ListMarkerMatch {
186                marker: ListMarker::Bullet(ch),
187                marker_len: 1,
188                spaces_after_cols,
189                spaces_after_bytes,
190            });
191        }
192    }
193
194    // Try ordered markers
195    if config.extensions.fancy_lists
196        && let Some(after_marker) = trimmed.strip_prefix("#.")
197        && (after_marker.starts_with(' ')
198            || after_marker.starts_with('\t')
199            || after_marker.is_empty())
200    {
201        let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
202        return Some(ListMarkerMatch {
203            marker: ListMarker::Ordered(OrderedMarker::Hash),
204            marker_len: 2,
205            spaces_after_cols,
206            spaces_after_bytes,
207        });
208    }
209
210    // Try example lists: (@) or (@label)
211    if config.extensions.example_lists
212        && let Some(rest) = trimmed.strip_prefix("(@")
213    {
214        // Check if it has a label or is just (@)
215        let label_end = rest
216            .chars()
217            .take_while(|c| c.is_alphanumeric() || *c == '_' || *c == '-')
218            .count();
219
220        // Must be followed by ')'
221        if rest.len() > label_end && rest.chars().nth(label_end) == Some(')') {
222            let label = if label_end > 0 {
223                Some(rest[..label_end].to_string())
224            } else {
225                None
226            };
227
228            let after_marker = &rest[label_end + 1..];
229            if after_marker.starts_with(' ')
230                || after_marker.starts_with('\t')
231                || after_marker.is_empty()
232            {
233                let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
234                let marker_len = 2 + label_end + 1; // "(@" + label + ")"
235                return Some(ListMarkerMatch {
236                    marker: ListMarker::Ordered(OrderedMarker::Example { label }),
237                    marker_len,
238                    spaces_after_cols,
239                    spaces_after_bytes,
240                });
241            }
242        }
243    }
244
245    // Try parenthesized markers: (2), (a), (ii)
246    if let Some(rest) = trimmed.strip_prefix('(') {
247        if config.extensions.fancy_lists {
248            // Try decimal: (2)
249            let digit_count = rest.chars().take_while(|c| c.is_ascii_digit()).count();
250            if digit_count > 0
251                && rest.len() > digit_count
252                && rest.chars().nth(digit_count) == Some(')')
253            {
254                let number = &rest[..digit_count];
255                let after_marker = &rest[digit_count + 1..];
256                if after_marker.starts_with(' ')
257                    || after_marker.starts_with('\t')
258                    || after_marker.is_empty()
259                {
260                    let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
261                    let marker_len = 2 + digit_count;
262                    return Some(ListMarkerMatch {
263                        marker: ListMarker::Ordered(OrderedMarker::Decimal {
264                            number: number.to_string(),
265                            style: ListDelimiter::Parens,
266                        }),
267                        marker_len,
268                        spaces_after_cols,
269                        spaces_after_bytes,
270                    });
271                }
272            }
273        }
274
275        // Try fancy lists if enabled (parenthesized markers)
276        if config.extensions.fancy_lists {
277            // Try Roman numerals first (to avoid ambiguity with letters i, v, x, etc.)
278
279            // Try lowercase Roman: (ii)
280            if let Some((numeral, len)) = try_parse_roman_numeral(rest, false)
281                && rest.len() > len
282                && rest.chars().nth(len) == Some(')')
283            {
284                let after_marker = &rest[len + 1..];
285                if after_marker.starts_with(' ')
286                    || after_marker.starts_with('\t')
287                    || after_marker.is_empty()
288                {
289                    let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
290                    return Some(ListMarkerMatch {
291                        marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
292                            numeral,
293                            style: ListDelimiter::Parens,
294                        }),
295                        marker_len: len + 2,
296                        spaces_after_cols,
297                        spaces_after_bytes,
298                    });
299                }
300            }
301
302            // Try uppercase Roman: (II)
303            if let Some((numeral, len)) = try_parse_roman_numeral(rest, true)
304                && rest.len() > len
305                && rest.chars().nth(len) == Some(')')
306            {
307                let after_marker = &rest[len + 1..];
308                if after_marker.starts_with(' ')
309                    || after_marker.starts_with('\t')
310                    || after_marker.is_empty()
311                {
312                    let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
313                    return Some(ListMarkerMatch {
314                        marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
315                            numeral,
316                            style: ListDelimiter::Parens,
317                        }),
318                        marker_len: len + 2,
319                        spaces_after_cols,
320                        spaces_after_bytes,
321                    });
322                }
323            }
324
325            // Try lowercase letter: (a)
326            if let Some(ch) = rest.chars().next()
327                && ch.is_ascii_lowercase()
328                && rest.len() > 1
329                && rest.chars().nth(1) == Some(')')
330            {
331                let after_marker = &rest[2..];
332                if after_marker.starts_with(' ')
333                    || after_marker.starts_with('\t')
334                    || after_marker.is_empty()
335                {
336                    let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
337                    return Some(ListMarkerMatch {
338                        marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
339                            letter: ch,
340                            style: ListDelimiter::Parens,
341                        }),
342                        marker_len: 3,
343                        spaces_after_cols,
344                        spaces_after_bytes,
345                    });
346                }
347            }
348
349            // Try uppercase letter: (A)
350            if let Some(ch) = rest.chars().next()
351                && ch.is_ascii_uppercase()
352                && rest.len() > 1
353                && rest.chars().nth(1) == Some(')')
354            {
355                let after_marker = &rest[2..];
356                if after_marker.starts_with(' ')
357                    || after_marker.starts_with('\t')
358                    || after_marker.is_empty()
359                {
360                    let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
361                    return Some(ListMarkerMatch {
362                        marker: ListMarker::Ordered(OrderedMarker::UpperAlpha {
363                            letter: ch,
364                            style: ListDelimiter::Parens,
365                        }),
366                        marker_len: 3,
367                        spaces_after_cols,
368                        spaces_after_bytes,
369                    });
370                }
371            }
372        }
373    }
374
375    // Try decimal numbers: 1. or 1)
376    let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
377    if digit_count > 0 && trimmed.len() > digit_count {
378        let number = &trimmed[..digit_count];
379        let delim = trimmed.chars().nth(digit_count);
380
381        let (style, marker_len) = match delim {
382            Some('.') => (ListDelimiter::Period, digit_count + 1),
383            Some(')') => (ListDelimiter::RightParen, digit_count + 1),
384            _ => return None,
385        };
386        if style == ListDelimiter::RightParen && !config.extensions.fancy_lists {
387            return None;
388        }
389
390        let after_marker = &trimmed[marker_len..];
391        if after_marker.starts_with(' ')
392            || after_marker.starts_with('\t')
393            || after_marker.is_empty()
394        {
395            let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
396            return Some(ListMarkerMatch {
397                marker: ListMarker::Ordered(OrderedMarker::Decimal {
398                    number: number.to_string(),
399                    style,
400                }),
401                marker_len,
402                spaces_after_cols,
403                spaces_after_bytes,
404            });
405        }
406    }
407
408    // Try fancy lists if enabled (non-parenthesized)
409    if config.extensions.fancy_lists {
410        // Try Roman numerals first, as they may overlap with letters
411
412        // Try lowercase Roman: i. or ii)
413        if let Some((numeral, len)) = try_parse_roman_numeral(trimmed, false)
414            && trimmed.len() > len
415            && let Some(delim) = trimmed.chars().nth(len)
416            && (delim == '.' || delim == ')')
417        {
418            let style = if delim == '.' {
419                ListDelimiter::Period
420            } else {
421                ListDelimiter::RightParen
422            };
423            let marker_len = len + 1;
424
425            let after_marker = &trimmed[marker_len..];
426            if after_marker.starts_with(' ')
427                || after_marker.starts_with('\t')
428                || after_marker.is_empty()
429            {
430                let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
431                return Some(ListMarkerMatch {
432                    marker: ListMarker::Ordered(OrderedMarker::LowerRoman { numeral, style }),
433                    marker_len,
434                    spaces_after_cols,
435                    spaces_after_bytes,
436                });
437            }
438        }
439
440        // Try uppercase Roman: I. or II)
441        if let Some((numeral, len)) = try_parse_roman_numeral(trimmed, true)
442            && trimmed.len() > len
443            && let Some(delim) = trimmed.chars().nth(len)
444            && (delim == '.' || delim == ')')
445        {
446            let style = if delim == '.' {
447                ListDelimiter::Period
448            } else {
449                ListDelimiter::RightParen
450            };
451            let marker_len = len + 1;
452
453            let after_marker = &trimmed[marker_len..];
454            if after_marker.starts_with(' ')
455                || after_marker.starts_with('\t')
456                || after_marker.is_empty()
457            {
458                let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
459                return Some(ListMarkerMatch {
460                    marker: ListMarker::Ordered(OrderedMarker::UpperRoman { numeral, style }),
461                    marker_len,
462                    spaces_after_cols,
463                    spaces_after_bytes,
464                });
465            }
466        }
467
468        // Try lowercase letter: a. or a)
469        if let Some(ch) = trimmed.chars().next()
470            && ch.is_ascii_lowercase()
471            && trimmed.len() > 1
472            && let Some(delim) = trimmed.chars().nth(1)
473            && (delim == '.' || delim == ')')
474        {
475            let style = if delim == '.' {
476                ListDelimiter::Period
477            } else {
478                ListDelimiter::RightParen
479            };
480            let marker_len = 2;
481
482            let after_marker = &trimmed[marker_len..];
483            if after_marker.starts_with(' ')
484                || after_marker.starts_with('\t')
485                || after_marker.is_empty()
486            {
487                let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
488                return Some(ListMarkerMatch {
489                    marker: ListMarker::Ordered(OrderedMarker::LowerAlpha { letter: ch, style }),
490                    marker_len,
491                    spaces_after_cols,
492                    spaces_after_bytes,
493                });
494            }
495        }
496
497        // Try uppercase letter: A. or A)
498        if let Some(ch) = trimmed.chars().next()
499            && ch.is_ascii_uppercase()
500            && trimmed.len() > 1
501            && let Some(delim) = trimmed.chars().nth(1)
502            && (delim == '.' || delim == ')')
503        {
504            let style = if delim == '.' {
505                ListDelimiter::Period
506            } else {
507                ListDelimiter::RightParen
508            };
509            let marker_len = 2;
510
511            let after_marker = &trimmed[marker_len..];
512            // Special rule: uppercase letter with period needs 2 spaces minimum
513            let min_spaces = if delim == '.' { 2 } else { 1 };
514            let (spaces_after_cols, spaces_after_bytes) = leading_indent(after_marker);
515
516            if (after_marker.starts_with(' ') || after_marker.starts_with('\t'))
517                && spaces_after_cols >= min_spaces
518            {
519                return Some(ListMarkerMatch {
520                    marker: ListMarker::Ordered(OrderedMarker::UpperAlpha { letter: ch, style }),
521                    marker_len,
522                    spaces_after_cols,
523                    spaces_after_bytes,
524                });
525            }
526        }
527    }
528
529    None
530}
531
532pub(crate) fn markers_match(a: &ListMarker, b: &ListMarker) -> bool {
533    match (a, b) {
534        // All bullet list markers (-, *, +) are considered matching (Pandoc behavior)
535        (ListMarker::Bullet(_), ListMarker::Bullet(_)) => true,
536        (ListMarker::Ordered(OrderedMarker::Hash), ListMarker::Ordered(OrderedMarker::Hash)) => {
537            true
538        }
539        (
540            ListMarker::Ordered(OrderedMarker::Decimal { style: s1, .. }),
541            ListMarker::Ordered(OrderedMarker::Decimal { style: s2, .. }),
542        ) => s1 == s2,
543        (
544            ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s1, .. }),
545            ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s2, .. }),
546        ) => s1 == s2,
547        (
548            ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s1, .. }),
549            ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s2, .. }),
550        ) => s1 == s2,
551        (
552            ListMarker::Ordered(OrderedMarker::LowerRoman { style: s1, .. }),
553            ListMarker::Ordered(OrderedMarker::LowerRoman { style: s2, .. }),
554        ) => s1 == s2,
555        (
556            ListMarker::Ordered(OrderedMarker::UpperRoman { style: s1, .. }),
557            ListMarker::Ordered(OrderedMarker::UpperRoman { style: s2, .. }),
558        ) => s1 == s2,
559        (
560            ListMarker::Ordered(OrderedMarker::Example { .. }),
561            ListMarker::Ordered(OrderedMarker::Example { .. }),
562        ) => true, // All example list items match each other
563        _ => false,
564    }
565}
566
567/// Emit a list item node to the builder (marker and whitespace only).
568/// Returns (content_col, text_to_buffer) where text_to_buffer is the content that should be
569/// added to the list item buffer for later inline parsing.
570pub(in crate::parser) fn emit_list_item(
571    builder: &mut GreenNodeBuilder<'static>,
572    item: &ListItemEmissionInput<'_>,
573) -> (usize, String) {
574    builder.start_node(SyntaxKind::LIST_ITEM.into());
575
576    // Emit leading indentation for lossless parsing
577    if item.indent_bytes > 0 {
578        builder.token(
579            SyntaxKind::WHITESPACE.into(),
580            &item.content[..item.indent_bytes],
581        );
582    }
583
584    let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
585    builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
586
587    if item.spaces_after_bytes > 0 {
588        let space_start = item.indent_bytes + item.marker_len;
589        let space_end = space_start + item.spaces_after_bytes;
590        if space_end <= item.content.len() {
591            builder.token(
592                SyntaxKind::WHITESPACE.into(),
593                &item.content[space_start..space_end],
594            );
595        }
596    }
597
598    let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
599    let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
600
601    // Extract text content to be buffered (instead of emitting it directly).
602    // If the item starts with a task checkbox, emit it as a dedicated token so it
603    // doesn't get parsed as a link.
604    let text_to_buffer = if content_start < item.content.len() {
605        let rest = &item.content[content_start..];
606        if (rest.starts_with("[ ]") || rest.starts_with("[x]") || rest.starts_with("[X]"))
607            && rest
608                .as_bytes()
609                .get(3)
610                .is_some_and(|b| (*b as char).is_whitespace())
611        {
612            builder.token(SyntaxKind::TASK_CHECKBOX.into(), &rest[..3]);
613            rest[3..].to_string()
614        } else {
615            rest.to_string()
616        }
617    } else {
618        String::new()
619    };
620
621    (content_col, text_to_buffer)
622}
623
624#[cfg(test)]
625mod tests {
626    use super::*;
627    use crate::options::ParserOptions;
628
629    #[test]
630    fn detects_bullet_markers() {
631        let config = ParserOptions::default();
632        assert!(try_parse_list_marker("* item", &config).is_some());
633        assert!(try_parse_list_marker("*\titem", &config).is_some());
634    }
635
636    #[test]
637    fn detects_fancy_alpha_markers() {
638        let mut config = ParserOptions::default();
639        config.extensions.fancy_lists = true;
640
641        // Test lowercase alpha period
642        assert!(
643            try_parse_list_marker("a. item", &config).is_some(),
644            "a. should parse"
645        );
646        assert!(
647            try_parse_list_marker("b. item", &config).is_some(),
648            "b. should parse"
649        );
650        assert!(
651            try_parse_list_marker("c. item", &config).is_some(),
652            "c. should parse"
653        );
654
655        // Test lowercase alpha right paren
656        assert!(
657            try_parse_list_marker("a) item", &config).is_some(),
658            "a) should parse"
659        );
660        assert!(
661            try_parse_list_marker("b) item", &config).is_some(),
662            "b) should parse"
663        );
664    }
665}
666
667#[test]
668fn markers_match_fancy_lists() {
669    use ListDelimiter::*;
670    use ListMarker::*;
671    use OrderedMarker::*;
672
673    // Same type and style should match
674    let a_period = Ordered(LowerAlpha {
675        letter: 'a',
676        style: Period,
677    });
678    let b_period = Ordered(LowerAlpha {
679        letter: 'b',
680        style: Period,
681    });
682    assert!(
683        markers_match(&a_period, &b_period),
684        "a. and b. should match"
685    );
686
687    let i_period = Ordered(LowerRoman {
688        numeral: "i".to_string(),
689        style: Period,
690    });
691    let ii_period = Ordered(LowerRoman {
692        numeral: "ii".to_string(),
693        style: Period,
694    });
695    assert!(
696        markers_match(&i_period, &ii_period),
697        "i. and ii. should match"
698    );
699
700    // Different styles should not match
701    let a_paren = Ordered(LowerAlpha {
702        letter: 'a',
703        style: RightParen,
704    });
705    assert!(
706        !markers_match(&a_period, &a_paren),
707        "a. and a) should not match"
708    );
709}
710
711#[test]
712fn detects_complex_roman_numerals() {
713    let mut config = ParserOptions::default();
714    config.extensions.fancy_lists = true;
715
716    // Test various Roman numerals
717    assert!(
718        try_parse_list_marker("iv. item", &config).is_some(),
719        "iv. should parse"
720    );
721    assert!(
722        try_parse_list_marker("v. item", &config).is_some(),
723        "v. should parse"
724    );
725    assert!(
726        try_parse_list_marker("vi. item", &config).is_some(),
727        "vi. should parse"
728    );
729    assert!(
730        try_parse_list_marker("vii. item", &config).is_some(),
731        "vii. should parse"
732    );
733    assert!(
734        try_parse_list_marker("viii. item", &config).is_some(),
735        "viii. should parse"
736    );
737    assert!(
738        try_parse_list_marker("ix. item", &config).is_some(),
739        "ix. should parse"
740    );
741    assert!(
742        try_parse_list_marker("x. item", &config).is_some(),
743        "x. should parse"
744    );
745}
746
747#[test]
748fn detects_example_list_markers() {
749    let mut config = ParserOptions::default();
750    config.extensions.example_lists = true;
751
752    // Test unlabeled example
753    assert!(
754        try_parse_list_marker("(@) item", &config).is_some(),
755        "(@) should parse"
756    );
757
758    // Test labeled examples
759    assert!(
760        try_parse_list_marker("(@foo) item", &config).is_some(),
761        "(@foo) should parse"
762    );
763    assert!(
764        try_parse_list_marker("(@my_label) item", &config).is_some(),
765        "(@my_label) should parse"
766    );
767    assert!(
768        try_parse_list_marker("(@test-123) item", &config).is_some(),
769        "(@test-123) should parse"
770    );
771
772    // Test with extension disabled
773    let disabled_config = ParserOptions {
774        extensions: crate::options::Extensions {
775            example_lists: false,
776            ..Default::default()
777        },
778        ..Default::default()
779    };
780    assert!(
781        try_parse_list_marker("(@) item", &disabled_config).is_none(),
782        "(@) should not parse when extension disabled"
783    );
784}
785
786#[test]
787fn parses_nested_bullet_list_from_single_marker() {
788    use crate::parse;
789    use crate::syntax::SyntaxKind;
790
791    let config = ParserOptions::default();
792
793    // Test all three bullet marker combinations as nested lists
794    for (input, desc) in [("- *\n", "- *"), ("- +\n", "- +"), ("- -\n", "- -")] {
795        let tree = parse(input, Some(config.clone()));
796
797        // tree IS the DOCUMENT node
798        assert_eq!(
799            tree.kind(),
800            SyntaxKind::DOCUMENT,
801            "{desc}: root should be DOCUMENT"
802        );
803
804        // Should have a LIST as first child of DOCUMENT
805        let outer_list = tree
806            .children()
807            .find(|n| n.kind() == SyntaxKind::LIST)
808            .unwrap_or_else(|| panic!("{desc}: should have outer LIST node"));
809
810        // Outer list should have a LIST_ITEM
811        let outer_item = outer_list
812            .children()
813            .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
814            .unwrap_or_else(|| panic!("{desc}: should have outer LIST_ITEM"));
815
816        // Outer list item should contain a nested LIST (not PLAIN with TEXT)
817        let nested_list = outer_item
818            .children()
819            .find(|n| n.kind() == SyntaxKind::LIST)
820            .unwrap_or_else(|| {
821                panic!(
822                    "{desc}: outer LIST_ITEM should contain nested LIST, got: {:?}",
823                    outer_item.children().map(|n| n.kind()).collect::<Vec<_>>()
824                )
825            });
826
827        // Nested list should have a LIST_ITEM
828        let nested_item = nested_list
829            .children()
830            .find(|n| n.kind() == SyntaxKind::LIST_ITEM)
831            .unwrap_or_else(|| panic!("{desc}: nested LIST should have LIST_ITEM"));
832
833        // Nested list item should be empty (no PLAIN or TEXT content)
834        let has_plain = nested_item
835            .children()
836            .any(|n| n.kind() == SyntaxKind::PLAIN);
837        assert!(
838            !has_plain,
839            "{desc}: nested LIST_ITEM should not have PLAIN node (should be empty)"
840        );
841    }
842}
843
844// Helper functions for list management in Parser
845
846/// Check if we're in any list.
847pub(in crate::parser) fn in_list(containers: &ContainerStack) -> bool {
848    containers
849        .stack
850        .iter()
851        .any(|c| matches!(c, Container::List { .. }))
852}
853
854/// Check if we're in a list inside a blockquote.
855pub(in crate::parser) fn in_blockquote_list(containers: &ContainerStack) -> bool {
856    let mut seen_blockquote = false;
857    for c in &containers.stack {
858        if matches!(c, Container::BlockQuote { .. }) {
859            seen_blockquote = true;
860        }
861        if seen_blockquote && matches!(c, Container::List { .. }) {
862            return true;
863        }
864    }
865    false
866}
867
868/// Find matching list level for a marker with the given indent.
869pub(in crate::parser) fn find_matching_list_level(
870    containers: &ContainerStack,
871    marker: &ListMarker,
872    indent_cols: usize,
873) -> Option<usize> {
874    // Search from deepest (last) to shallowest (first)
875    // But for shallow items (0-3 indent), prefer matching at the closest base indent
876    let mut best_match: Option<(usize, usize, bool)> = None; // (index, distance, base_leq_indent)
877
878    for (i, c) in containers.stack.iter().enumerate().rev() {
879        if let Container::List {
880            marker: list_marker,
881            base_indent_cols,
882            ..
883        } = c
884            && markers_match(marker, list_marker)
885        {
886            let matches = if indent_cols >= 4 && *base_indent_cols >= 4 {
887                // Both deeply indented - require close match
888                indent_cols >= *base_indent_cols && indent_cols <= base_indent_cols + 3
889            } else if indent_cols >= 4 || *base_indent_cols >= 4 {
890                // One shallow, one deep - no match
891                false
892            } else {
893                // Both at shallow indentation (0-3)
894                // Allow items within 3 spaces
895                indent_cols.abs_diff(*base_indent_cols) <= 3
896            };
897
898            if matches {
899                let distance = indent_cols.abs_diff(*base_indent_cols);
900                let base_leq_indent = *base_indent_cols <= indent_cols;
901                if let Some((_, best_dist, best_base_leq)) = best_match {
902                    if distance < best_dist
903                        || (distance == best_dist && base_leq_indent && !best_base_leq)
904                    {
905                        best_match = Some((i, distance, base_leq_indent));
906                    }
907                } else {
908                    best_match = Some((i, distance, base_leq_indent));
909                }
910
911                // If we found an exact match, return immediately
912                if distance == 0 {
913                    return Some(i);
914                }
915            }
916        }
917    }
918
919    best_match.map(|(i, _, _)| i)
920}
921
922/// Start a nested list within an existing list item.
923pub(in crate::parser) fn start_nested_list(
924    containers: &mut ContainerStack,
925    builder: &mut GreenNodeBuilder<'static>,
926    marker: &ListMarker,
927    item: &ListItemEmissionInput<'_>,
928    indent_to_emit: Option<&str>,
929) {
930    // Emit the indent if needed
931    if let Some(indent_str) = indent_to_emit {
932        builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
933    }
934
935    // Start nested list
936    builder.start_node(SyntaxKind::LIST.into());
937    containers.push(Container::List {
938        marker: marker.clone(),
939        base_indent_cols: item.indent_cols,
940        has_blank_between_items: false,
941    });
942
943    // Add the nested list item
944    let (content_col, text_to_buffer) = emit_list_item(builder, item);
945    let mut buffer = ListItemBuffer::new();
946    if !text_to_buffer.is_empty() {
947        buffer.push_text(text_to_buffer);
948    }
949    containers.push(Container::ListItem {
950        content_col,
951        buffer,
952    });
953}
954
955/// Checks if the content after a list marker is exactly another bullet marker.
956/// Returns the nested bullet marker character if detected.
957pub(in crate::parser) fn is_content_nested_bullet_marker(
958    content: &str,
959    marker_len: usize,
960    spaces_after_bytes: usize,
961) -> Option<char> {
962    let (_, indent_bytes) = leading_indent(content);
963    let content_start = indent_bytes + marker_len + spaces_after_bytes;
964
965    if content_start >= content.len() {
966        return None;
967    }
968
969    let remaining = &content[content_start..];
970    let (text_part, _) = strip_newline(remaining);
971    let trimmed = text_part.trim();
972
973    // Check if it's exactly one of the bullet marker characters
974    if trimmed.len() == 1 {
975        let ch = trimmed.chars().next().unwrap();
976        if matches!(ch, '*' | '+' | '-') {
977            return Some(ch);
978        }
979    }
980
981    None
982}
983
984/// Add a list item that contains a nested empty list (for cases like `- *`).
985/// This creates: LIST_ITEM (outer) -> LIST (nested) -> LIST_ITEM (empty inner)
986pub(in crate::parser) fn add_list_item_with_nested_empty_list(
987    containers: &mut ContainerStack,
988    builder: &mut GreenNodeBuilder<'static>,
989    item: &ListItemEmissionInput<'_>,
990    nested_marker: char,
991) {
992    // First, emit the outer list item (just marker + whitespace)
993    builder.start_node(SyntaxKind::LIST_ITEM.into());
994
995    // Emit leading indentation for lossless parsing
996    if item.indent_bytes > 0 {
997        builder.token(
998            SyntaxKind::WHITESPACE.into(),
999            &item.content[..item.indent_bytes],
1000        );
1001    }
1002
1003    let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
1004    builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
1005
1006    if item.spaces_after_bytes > 0 {
1007        let space_start = item.indent_bytes + item.marker_len;
1008        let space_end = space_start + item.spaces_after_bytes;
1009        if space_end <= item.content.len() {
1010            builder.token(
1011                SyntaxKind::WHITESPACE.into(),
1012                &item.content[space_start..space_end],
1013            );
1014        }
1015    }
1016
1017    // Now start the nested list inside this item
1018    builder.start_node(SyntaxKind::LIST.into());
1019
1020    // Add empty list item to the nested list
1021    builder.start_node(SyntaxKind::LIST_ITEM.into());
1022    builder.token(SyntaxKind::LIST_MARKER.into(), &nested_marker.to_string());
1023
1024    // Extract and emit the newline from original content (lossless)
1025    let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
1026    if content_start < item.content.len() {
1027        let remaining = &item.content[content_start..];
1028        // Skip the nested marker character (1 byte) and get the newline
1029        if remaining.len() > 1 {
1030            let (_, newline_str) = strip_newline(&remaining[1..]);
1031            if !newline_str.is_empty() {
1032                builder.token(SyntaxKind::NEWLINE.into(), newline_str);
1033            }
1034        }
1035    }
1036
1037    builder.finish_node(); // Close nested LIST_ITEM
1038    builder.finish_node(); // Close nested LIST
1039
1040    // Push container for the outer list item
1041    let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
1042    containers.push(Container::ListItem {
1043        content_col,
1044        buffer: ListItemBuffer::new(),
1045    });
1046}
1047
1048/// Add a list item to the current list.
1049pub(in crate::parser) fn add_list_item(
1050    containers: &mut ContainerStack,
1051    builder: &mut GreenNodeBuilder<'static>,
1052    item: &ListItemEmissionInput<'_>,
1053) {
1054    let (content_col, text_to_buffer) = emit_list_item(builder, item);
1055
1056    log::debug!(
1057        "add_list_item: content={:?}, text_to_buffer={:?}",
1058        item.content,
1059        text_to_buffer
1060    );
1061
1062    let mut buffer = ListItemBuffer::new();
1063    if !text_to_buffer.is_empty() {
1064        buffer.push_text(text_to_buffer);
1065    }
1066    containers.push(Container::ListItem {
1067        content_col,
1068        buffer,
1069    });
1070}