Skip to main content

rumdl_lib/rules/
list_utils.rs

1use regex::Regex;
2use std::sync::LazyLock;
3
4// Optimized list detection patterns with anchors and non-capturing groups
5static UNORDERED_LIST_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)([*+-])(\s+)").unwrap());
6static ORDERED_LIST_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)(\d+\.)(\s+)").unwrap());
7
8// Patterns for lists without proper spacing
9// [^\s*] after the marker excludes emphasis (e.g., **bold**) since * is not matched
10static UNORDERED_LIST_NO_SPACE_PATTERN: LazyLock<Regex> =
11    LazyLock::new(|| Regex::new(r"^(\s*)([*+-])([^\s*])").unwrap());
12static ORDERED_LIST_NO_SPACE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)(\d+\.)([^\s])").unwrap());
13
14// Patterns for lists with multiple spaces
15static UNORDERED_LIST_MULTIPLE_SPACE_PATTERN: LazyLock<Regex> =
16    LazyLock::new(|| Regex::new(r"^(\s*)([*+-])(\s{2,})").unwrap());
17static ORDERED_LIST_MULTIPLE_SPACE_PATTERN: LazyLock<Regex> =
18    LazyLock::new(|| Regex::new(r"^(\s*)(\d+\.)(\s{2,})").unwrap());
19
20// Regex to capture list markers and the spaces *after* them
21pub static LIST_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)([-*+]|\d+\.)(\s*)").unwrap());
22
23/// Enum representing different types of list markers
24#[derive(Debug, Clone, PartialEq)]
25pub enum ListMarkerType {
26    Asterisk,
27    Plus,
28    Minus,
29    Ordered,
30}
31
32/// Struct representing a list item
33#[derive(Debug, Clone)]
34pub struct ListItem {
35    pub indentation: usize,
36    pub marker_type: ListMarkerType,
37    pub marker: String,
38    pub content: String,
39    pub spaces_after_marker: usize,
40}
41
42/// Utility functions for detecting and handling lists in Markdown documents
43pub struct ListUtils;
44
45impl ListUtils {
46    /// Calculate indentation level, counting tabs as 4 spaces per CommonMark spec
47    pub fn calculate_indentation(s: &str) -> usize {
48        s.chars()
49            .take_while(|c| c.is_whitespace())
50            .map(|c| if c == '\t' { 4 } else { 1 })
51            .sum()
52    }
53
54    /// Check if a line is a list item
55    pub fn is_list_item(line: &str) -> bool {
56        // Fast path for common cases
57        if line.is_empty() {
58            return false;
59        }
60
61        let trimmed = line.trim_start();
62        if trimmed.is_empty() {
63            return false;
64        }
65
66        // Quick literal check for common list markers
67        let Some(first_char) = trimmed.chars().next() else {
68            return false;
69        };
70        match first_char {
71            '*' | '+' | '-' => {
72                if trimmed.len() > 1 {
73                    let mut chars = trimmed.chars();
74                    chars.next(); // Skip first char
75                    if let Some(second_char) = chars.next() {
76                        return second_char.is_whitespace();
77                    }
78                }
79                false
80            }
81            '0'..='9' => {
82                // Check for ordered list pattern using a literal search first
83                let dot_pos = trimmed.find('.');
84                if let Some(pos) = dot_pos
85                    && pos > 0
86                    && pos < trimmed.len() - 1
87                {
88                    let after_dot = &trimmed[pos + 1..];
89                    return after_dot.starts_with(' ');
90                }
91                false
92            }
93            _ => false,
94        }
95    }
96
97    /// Check if a line is an unordered list item
98    pub fn is_unordered_list_item(line: &str) -> bool {
99        // Fast path for common cases
100        if line.is_empty() {
101            return false;
102        }
103
104        let trimmed = line.trim_start();
105        if trimmed.is_empty() {
106            return false;
107        }
108
109        // Quick literal check for unordered list markers
110        let Some(first_char) = trimmed.chars().next() else {
111            return false;
112        };
113        if (first_char == '*' || first_char == '+' || first_char == '-')
114            && trimmed.len() > 1
115            && let Some(second_char) = trimmed.chars().nth(1)
116        {
117            return second_char.is_whitespace();
118        }
119
120        false
121    }
122
123    /// Check if a line is an ordered list item
124    pub fn is_ordered_list_item(line: &str) -> bool {
125        // Fast path for common cases
126        if line.is_empty() {
127            return false;
128        }
129
130        let trimmed = line.trim_start();
131        if trimmed.is_empty() {
132            return false;
133        }
134
135        let Some(first_char) = trimmed.chars().next() else {
136            return false;
137        };
138
139        if !first_char.is_ascii_digit() {
140            return false;
141        }
142
143        // Check for ordered list pattern using a literal search
144        let dot_pos = trimmed.find('.');
145        if let Some(pos) = dot_pos
146            && pos > 0
147            && pos < trimmed.len() - 1
148        {
149            let after_dot = &trimmed[pos + 1..];
150            return after_dot.starts_with(' ');
151        }
152
153        false
154    }
155
156    /// Check if a line is a list item without proper spacing after the marker
157    pub fn is_list_item_without_space(line: &str) -> bool {
158        // Skip lines that start with double asterisks (bold text)
159        if line.trim_start().starts_with("**") {
160            return false;
161        }
162
163        // Skip lines that have bold/emphasis markers (typically table cells with bold text)
164        if line.trim_start().contains("**") || line.trim_start().contains("__") {
165            return false;
166        }
167
168        // Skip lines that are part of a Markdown table
169        if crate::utils::skip_context::is_table_line(line) {
170            return false;
171        }
172
173        // Skip lines that are horizontal rules or table delimiter rows
174        let trimmed = line.trim();
175        if !trimmed.is_empty() {
176            // Check for horizontal rules (only dashes and whitespace)
177            if trimmed.chars().all(|c| c == '-' || c.is_whitespace()) {
178                return false;
179            }
180
181            // Check for table delimiter rows without pipes (e.g., in cases where pipes are optional)
182            // These have dashes and possibly colons for alignment
183            if trimmed.contains('-') && trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) {
184                return false;
185            }
186        }
187
188        // Skip lines that are part of emphasis/bold text
189        if line.trim_start().matches('*').count() >= 2 {
190            return false;
191        }
192
193        UNORDERED_LIST_NO_SPACE_PATTERN.is_match(line) || ORDERED_LIST_NO_SPACE_PATTERN.is_match(line)
194    }
195
196    /// Check if a line is a list item with multiple spaces after the marker
197    pub fn is_list_item_with_multiple_spaces(line: &str) -> bool {
198        UNORDERED_LIST_MULTIPLE_SPACE_PATTERN.is_match(line) || ORDERED_LIST_MULTIPLE_SPACE_PATTERN.is_match(line)
199    }
200
201    /// Parse a line as a list item
202    pub fn parse_list_item(line: &str) -> Option<ListItem> {
203        // First try to match unordered list pattern
204        if let Some(captures) = UNORDERED_LIST_PATTERN.captures(line) {
205            let indentation = captures.get(1).map_or(0, |m| Self::calculate_indentation(m.as_str()));
206            let marker = captures.get(2).unwrap().as_str();
207            let spaces = captures.get(3).map_or(0, |m| m.as_str().len());
208            let raw_indentation = captures.get(1).map_or(0, |m| m.as_str().len());
209            let content_start = raw_indentation + marker.len() + spaces;
210            let content = if content_start < line.len() {
211                line[content_start..].to_string()
212            } else {
213                String::new()
214            };
215
216            let marker_type = match marker {
217                "*" => ListMarkerType::Asterisk,
218                "+" => ListMarkerType::Plus,
219                "-" => ListMarkerType::Minus,
220                _ => unreachable!("UNORDERED_LIST_PATTERN regex guarantees marker is [*+-]"),
221            };
222
223            return Some(ListItem {
224                indentation,
225                marker_type,
226                marker: marker.to_string(),
227                content,
228                spaces_after_marker: spaces,
229            });
230        }
231
232        // Then try to match ordered list pattern
233        if let Some(captures) = ORDERED_LIST_PATTERN.captures(line) {
234            let indentation = captures.get(1).map_or(0, |m| Self::calculate_indentation(m.as_str()));
235            let marker = captures.get(2).unwrap().as_str();
236            let spaces = captures.get(3).map_or(0, |m| m.as_str().len());
237            let raw_indentation = captures.get(1).map_or(0, |m| m.as_str().len());
238            let content_start = raw_indentation + marker.len() + spaces;
239            let content = if content_start < line.len() {
240                line[content_start..].to_string()
241            } else {
242                String::new()
243            };
244
245            return Some(ListItem {
246                indentation,
247                marker_type: ListMarkerType::Ordered,
248                marker: marker.to_string(),
249                content,
250                spaces_after_marker: spaces,
251            });
252        }
253
254        None
255    }
256
257    /// Check if a line is a continuation of a list item
258    pub fn is_list_continuation(line: &str, prev_list_item: &ListItem) -> bool {
259        if line.trim().is_empty() {
260            return false;
261        }
262
263        // Calculate indentation level properly (tabs = 4 spaces)
264        let indentation = Self::calculate_indentation(line);
265
266        // Continuation should be indented at least as much as the content of the previous item
267        let min_indent = prev_list_item.indentation + prev_list_item.marker.len() + prev_list_item.spaces_after_marker;
268        indentation >= min_indent && !Self::is_list_item(line)
269    }
270
271    /// Fix a list item without proper spacing
272    pub fn fix_list_item_without_space(line: &str) -> String {
273        // Handle unordered list items
274        if let Some(captures) = UNORDERED_LIST_NO_SPACE_PATTERN.captures(line) {
275            let indentation = captures.get(1).map_or("", |m| m.as_str());
276            // Group 2 is the marker, group 3 is the first content char (no space)
277            let marker_end = captures.get(2).unwrap().end();
278            let rest = &line[marker_end..];
279            return format!(
280                "{indentation}{} {rest}",
281                &line[captures.get(1).unwrap().end()..marker_end]
282            );
283        }
284
285        // Handle ordered list items
286        if let Some(captures) = ORDERED_LIST_NO_SPACE_PATTERN.captures(line) {
287            let indentation = captures.get(1).map_or("", |m| m.as_str());
288            let marker_end = captures.get(2).unwrap().end();
289            let rest = &line[marker_end..];
290            return format!(
291                "{indentation}{} {rest}",
292                &line[captures.get(1).unwrap().end()..marker_end]
293            );
294        }
295
296        line.to_string()
297    }
298
299    /// Fix a list item with multiple spaces after the marker
300    pub fn fix_list_item_with_multiple_spaces(line: &str) -> String {
301        if let Some(captures) = UNORDERED_LIST_MULTIPLE_SPACE_PATTERN.captures(line) {
302            let leading_space = captures.get(1).map_or("", |m| m.as_str());
303            let marker = captures.get(2).map_or("", |m| m.as_str());
304            let spaces = captures.get(3).map_or("", |m| m.as_str());
305
306            // Get content after multiple spaces
307            let start_pos = leading_space.len() + marker.len() + spaces.len();
308            let content = if start_pos < line.len() { &line[start_pos..] } else { "" };
309
310            // Replace multiple spaces with a single space
311            return format!("{leading_space}{marker} {content}");
312        }
313
314        if let Some(captures) = ORDERED_LIST_MULTIPLE_SPACE_PATTERN.captures(line) {
315            let leading_space = captures.get(1).map_or("", |m| m.as_str());
316            let marker = captures.get(2).map_or("", |m| m.as_str());
317            let spaces = captures.get(3).map_or("", |m| m.as_str());
318
319            // Get content after multiple spaces
320            let start_pos = leading_space.len() + marker.len() + spaces.len();
321            let content = if start_pos < line.len() { &line[start_pos..] } else { "" };
322
323            // Replace multiple spaces with a single space
324            return format!("{leading_space}{marker} {content}");
325        }
326
327        // Return the original line if no pattern matched
328        line.to_string()
329    }
330}
331
332#[derive(Debug, Clone, Copy, PartialEq, Eq)]
333pub enum ListType {
334    Unordered,
335    Ordered,
336}
337
338/// Returns (ListType, matched string, number of spaces after marker) if the line is a list item
339pub fn is_list_item(line: &str) -> Option<(ListType, String, usize)> {
340    let trimmed_line = line.trim();
341    if trimmed_line.is_empty() {
342        return None;
343    }
344    // Horizontal rule check (--- or ***)
345    if trimmed_line.chars().all(|c| c == '-' || c == ' ') && trimmed_line.chars().filter(|&c| c == '-').count() >= 3 {
346        return None;
347    }
348    if trimmed_line.chars().all(|c| c == '*' || c == ' ') && trimmed_line.chars().filter(|&c| c == '*').count() >= 3 {
349        return None;
350    }
351    if let Some(cap) = LIST_REGEX.captures(line) {
352        let marker = &cap[2];
353        let spaces = cap[3].len();
354        let list_type = if marker.chars().next().is_some_and(|c| c.is_ascii_digit()) {
355            ListType::Ordered
356        } else {
357            ListType::Unordered
358        };
359        return Some((list_type, cap[0].to_string(), spaces));
360    }
361    None
362}
363
364/// Returns true if the list item at `lines[current_idx]` is a multi-line item
365pub fn is_multi_line_item(lines: &[&str], current_idx: usize) -> bool {
366    if current_idx >= lines.len() - 1 {
367        return false;
368    }
369    let next_line = lines[current_idx + 1].trim();
370    if next_line.is_empty() {
371        return false;
372    }
373    if is_list_item(next_line).is_some() {
374        return false;
375    }
376    let curr_indent = ListUtils::calculate_indentation(lines[current_idx]);
377    let next_indent = ListUtils::calculate_indentation(lines[current_idx + 1]);
378    next_indent > curr_indent
379}
380
381#[cfg(test)]
382mod tests {
383    use super::*;
384
385    #[test]
386    fn test_is_list_item_without_space() {
387        // Valid list item with space after marker
388        assert!(!ListUtils::is_list_item_without_space("- Item with space"));
389        assert!(!ListUtils::is_list_item_without_space("* Item with space"));
390        assert!(!ListUtils::is_list_item_without_space("+ Item with space"));
391        assert!(!ListUtils::is_list_item_without_space("1. Item with space"));
392
393        // Invalid list items without space after marker (should return true)
394        assert!(ListUtils::is_list_item_without_space("-No space"));
395        assert!(ListUtils::is_list_item_without_space("*No space"));
396        assert!(ListUtils::is_list_item_without_space("+No space"));
397        assert!(ListUtils::is_list_item_without_space("1.No space"));
398
399        // Not list items (should return false)
400        assert!(!ListUtils::is_list_item_without_space("Regular text"));
401        assert!(!ListUtils::is_list_item_without_space(""));
402        assert!(!ListUtils::is_list_item_without_space("    "));
403        assert!(!ListUtils::is_list_item_without_space("# Heading"));
404
405        // Bold/emphasis text that might be confused with list items (should return false)
406        assert!(!ListUtils::is_list_item_without_space("**Bold text**"));
407        assert!(!ListUtils::is_list_item_without_space("__Bold text__"));
408        assert!(!ListUtils::is_list_item_without_space("*Italic text*"));
409        assert!(!ListUtils::is_list_item_without_space("_Italic text_"));
410
411        // Table cells with bold/emphasis (should return false)
412        assert!(!ListUtils::is_list_item_without_space("| **Heading** | Content |"));
413        assert!(!ListUtils::is_list_item_without_space("**Bold** | Normal"));
414        assert!(!ListUtils::is_list_item_without_space("| Cell 1 | **Bold** |"));
415
416        // Horizontal rules (should return false)
417        assert!(!ListUtils::is_list_item_without_space("---"));
418        assert!(!ListUtils::is_list_item_without_space("----------"));
419        assert!(!ListUtils::is_list_item_without_space("   ---   "));
420
421        // Table delimiter rows (should return false)
422        assert!(!ListUtils::is_list_item_without_space("|--------|---------|"));
423        assert!(!ListUtils::is_list_item_without_space("|:-------|:-------:|"));
424        assert!(!ListUtils::is_list_item_without_space("| ------ | ------- |"));
425        assert!(!ListUtils::is_list_item_without_space("---------|----------|"));
426        assert!(!ListUtils::is_list_item_without_space(":--------|:--------:"));
427    }
428
429    #[test]
430    fn test_is_list_item() {
431        // Valid list items
432        assert!(ListUtils::is_list_item("- Item"));
433        assert!(ListUtils::is_list_item("* Item"));
434        assert!(ListUtils::is_list_item("+ Item"));
435        assert!(ListUtils::is_list_item("1. Item"));
436        assert!(ListUtils::is_list_item("  - Indented item"));
437
438        // Not list items
439        assert!(!ListUtils::is_list_item("Regular text"));
440        assert!(!ListUtils::is_list_item(""));
441        assert!(!ListUtils::is_list_item("    "));
442        assert!(!ListUtils::is_list_item("# Heading"));
443        assert!(!ListUtils::is_list_item("**Bold text**"));
444        assert!(!ListUtils::is_list_item("| Cell 1 | Cell 2 |"));
445    }
446
447    #[test]
448    fn test_complex_nested_lists() {
449        // Various indentation levels
450        assert!(ListUtils::is_list_item("- Level 1"));
451        assert!(ListUtils::is_list_item("  - Level 2"));
452        assert!(ListUtils::is_list_item("    - Level 3"));
453        assert!(ListUtils::is_list_item("      - Level 4"));
454        assert!(ListUtils::is_list_item("        - Level 5"));
455
456        // Mixed markers in nested lists
457        assert!(ListUtils::is_list_item("* Main item"));
458        assert!(ListUtils::is_list_item("  - Sub item"));
459        assert!(ListUtils::is_list_item("    + Sub-sub item"));
460        assert!(ListUtils::is_list_item("      * Deep item"));
461
462        // Ordered lists nested in unordered
463        assert!(ListUtils::is_list_item("- Unordered"));
464        assert!(ListUtils::is_list_item("  1. First ordered"));
465        assert!(ListUtils::is_list_item("  2. Second ordered"));
466        assert!(ListUtils::is_list_item("    - Back to unordered"));
467
468        // Tab indentation
469        assert!(ListUtils::is_list_item("\t- Tab indented"));
470        assert!(ListUtils::is_list_item("\t\t- Double tab"));
471        assert!(ListUtils::is_list_item("\t  - Tab plus spaces"));
472        assert!(ListUtils::is_list_item("  \t- Spaces plus tab"));
473    }
474
475    #[test]
476    fn test_parse_list_item_edge_cases() {
477        // Unicode content
478        let unicode_item = ListUtils::parse_list_item("- ๆต‹่ฏ•้กน็›ฎ ๐Ÿš€").unwrap();
479        assert_eq!(unicode_item.content, "ๆต‹่ฏ•้กน็›ฎ ๐Ÿš€");
480
481        // Empty content after marker
482        let empty_item = ListUtils::parse_list_item("- ").unwrap();
483        assert_eq!(empty_item.content, "");
484
485        // Multiple spaces after marker
486        let multi_space = ListUtils::parse_list_item("-   Multiple spaces").unwrap();
487        assert_eq!(multi_space.spaces_after_marker, 3);
488        assert_eq!(multi_space.content, "Multiple spaces");
489
490        // Very long ordered list numbers
491        let long_number = ListUtils::parse_list_item("999999. Item").unwrap();
492        assert_eq!(long_number.marker, "999999.");
493        assert_eq!(long_number.marker_type, ListMarkerType::Ordered);
494
495        // List with only marker - might not parse as valid list
496        if let Some(marker_only) = ListUtils::parse_list_item("*") {
497            assert_eq!(marker_only.content, "");
498            assert_eq!(marker_only.spaces_after_marker, 0);
499        }
500    }
501
502    #[test]
503    fn test_nested_list_detection() {
504        // Test detection of list items at various nesting levels
505        let lines = vec![
506            ("- Item 1", 0),
507            ("  - Item 1.1", 2),
508            ("    - Item 1.1.1", 4),
509            ("      - Item 1.1.1.1", 6),
510            ("    - Item 1.1.2", 4),
511            ("  - Item 1.2", 2),
512            ("- Item 2", 0),
513        ];
514
515        for (line, expected_indent) in lines {
516            let item = ListUtils::parse_list_item(line).unwrap();
517            assert_eq!(item.indentation, expected_indent, "Failed for line: {line}");
518        }
519    }
520
521    #[test]
522    fn test_mixed_list_markers() {
523        // Test different marker types
524        let markers = vec![
525            ("* Asterisk", ListMarkerType::Asterisk),
526            ("+ Plus", ListMarkerType::Plus),
527            ("- Minus", ListMarkerType::Minus),
528            ("1. Ordered", ListMarkerType::Ordered),
529            ("42. Ordered", ListMarkerType::Ordered),
530        ];
531
532        for (line, expected_type) in markers {
533            let item = ListUtils::parse_list_item(line).unwrap();
534            assert_eq!(item.marker_type, expected_type, "Failed for line: {line}");
535        }
536    }
537
538    #[test]
539    fn test_list_item_without_space_edge_cases() {
540        // Edge cases for missing spaces
541        assert!(ListUtils::is_list_item_without_space("*a"));
542        assert!(ListUtils::is_list_item_without_space("+b"));
543        assert!(ListUtils::is_list_item_without_space("-c"));
544        assert!(ListUtils::is_list_item_without_space("1.d"));
545
546        // Single character lines
547        assert!(!ListUtils::is_list_item_without_space("*"));
548        assert!(!ListUtils::is_list_item_without_space("+"));
549        assert!(!ListUtils::is_list_item_without_space("-"));
550
551        // Markers at end of line
552        assert!(!ListUtils::is_list_item_without_space("Text ends with -"));
553        assert!(!ListUtils::is_list_item_without_space("Text ends with *"));
554        assert!(!ListUtils::is_list_item_without_space("Number ends with 1."));
555    }
556
557    #[test]
558    fn test_list_item_with_multiple_spaces() {
559        // Test detection of multiple spaces after marker
560        assert!(ListUtils::is_list_item_with_multiple_spaces("-  Two spaces"));
561        assert!(ListUtils::is_list_item_with_multiple_spaces("*   Three spaces"));
562        assert!(ListUtils::is_list_item_with_multiple_spaces("+    Four spaces"));
563        assert!(ListUtils::is_list_item_with_multiple_spaces("1.  Two spaces"));
564
565        // Should not match single space
566        assert!(!ListUtils::is_list_item_with_multiple_spaces("- One space"));
567        assert!(!ListUtils::is_list_item_with_multiple_spaces("* One space"));
568        assert!(!ListUtils::is_list_item_with_multiple_spaces("+ One space"));
569        assert!(!ListUtils::is_list_item_with_multiple_spaces("1. One space"));
570    }
571
572    #[test]
573    fn test_complex_content_in_lists() {
574        // List items with inline formatting
575        let bold_item = ListUtils::parse_list_item("- **Bold** content").unwrap();
576        assert_eq!(bold_item.content, "**Bold** content");
577
578        let link_item = ListUtils::parse_list_item("* [Link](url) in list").unwrap();
579        assert_eq!(link_item.content, "[Link](url) in list");
580
581        let code_item = ListUtils::parse_list_item("+ Item with `code`").unwrap();
582        assert_eq!(code_item.content, "Item with `code`");
583
584        // List with inline HTML
585        let html_item = ListUtils::parse_list_item("- Item with <span>HTML</span>").unwrap();
586        assert_eq!(html_item.content, "Item with <span>HTML</span>");
587
588        // List with emoji
589        let emoji_item = ListUtils::parse_list_item("1. ๐ŸŽ‰ Party time!").unwrap();
590        assert_eq!(emoji_item.content, "๐ŸŽ‰ Party time!");
591    }
592
593    #[test]
594    fn test_ambiguous_list_markers() {
595        // Test cases that might be ambiguous
596
597        // Arithmetic expressions should not be lists
598        assert!(!ListUtils::is_list_item("2 + 2 = 4"));
599        assert!(!ListUtils::is_list_item("5 - 3 = 2"));
600        assert!(!ListUtils::is_list_item("3 * 3 = 9"));
601
602        // Emphasis markers should not be lists
603        assert!(!ListUtils::is_list_item("*emphasis*"));
604        assert!(!ListUtils::is_list_item("**strong**"));
605        assert!(!ListUtils::is_list_item("***strong emphasis***"));
606
607        // Date ranges
608        assert!(!ListUtils::is_list_item("2023-01-01 - 2023-12-31"));
609
610        // But these should be lists
611        assert!(ListUtils::is_list_item("- 2023-01-01 - 2023-12-31"));
612        assert!(ListUtils::is_list_item("* emphasis text here"));
613    }
614
615    #[test]
616    fn test_deeply_nested_complex_lists() {
617        let complex_doc = vec![
618            "- Top level item",
619            "  - Second level with **bold**",
620            "    1. Ordered item with `code`",
621            "    2. Another ordered item",
622            "      - Back to unordered [link](url)",
623            "        * Different marker",
624            "          + Yet another marker",
625            "            - Maximum nesting?",
626            "              1. Can we go deeper?",
627            "                - Apparently yes!",
628        ];
629
630        for line in complex_doc {
631            assert!(ListUtils::is_list_item(line), "Failed to recognize: {line}");
632            let item = ListUtils::parse_list_item(line).unwrap();
633            assert!(
634                !item.content.is_empty()
635                    || line.trim().ends_with('-')
636                    || line.trim().ends_with('*')
637                    || line.trim().ends_with('+')
638            );
639        }
640    }
641
642    #[test]
643    fn test_parse_list_item_comprehensive() {
644        // Test the comprehensive parsing with expected values
645        let test_cases = vec![
646            ("- Simple item", 0, ListMarkerType::Minus, "-", "Simple item"),
647            ("  * Indented", 2, ListMarkerType::Asterisk, "*", "Indented"),
648            ("    1. Ordered", 4, ListMarkerType::Ordered, "1.", "Ordered"),
649            ("\t+ Tab indent", 4, ListMarkerType::Plus, "+", "Tab indent"), // Tab counts as 4 spaces per CommonMark
650        ];
651
652        for (line, expected_indent, expected_type, expected_marker, expected_content) in test_cases {
653            let item = ListUtils::parse_list_item(line);
654            assert!(item.is_some(), "Failed to parse: {line}");
655            let item = item.unwrap();
656            assert_eq!(item.indentation, expected_indent, "Wrong indentation for: {line}");
657            assert_eq!(item.marker_type, expected_type, "Wrong marker type for: {line}");
658            assert_eq!(item.marker, expected_marker, "Wrong marker for: {line}");
659            assert_eq!(item.content, expected_content, "Wrong content for: {line}");
660        }
661    }
662
663    #[test]
664    fn test_special_characters_in_lists() {
665        // Test with special characters that might break regex
666        let special_cases = vec![
667            "- Item with $ dollar sign",
668            "* Item with ^ caret",
669            "+ Item with \\ backslash",
670            "- Item with | pipe",
671            "1. Item with ( ) parentheses",
672            "2. Item with [ ] brackets",
673            "3. Item with { } braces",
674        ];
675
676        for line in special_cases {
677            assert!(ListUtils::is_list_item(line), "Failed for: {line}");
678            let item = ListUtils::parse_list_item(line);
679            assert!(item.is_some(), "Failed to parse: {line}");
680        }
681    }
682
683    #[test]
684    fn test_list_continuations() {
685        // Lists that continue on multiple lines (not directly supported but shouldn't crash)
686        let continuation = "- This is a very long list item that \
687                           continues on the next line";
688        assert!(ListUtils::is_list_item(continuation));
689
690        // Indented continuation
691        let indented_cont = "  - Another long item that \
692                               continues with proper indentation";
693        assert!(ListUtils::is_list_item(indented_cont));
694    }
695
696    #[test]
697    fn test_performance_edge_cases() {
698        // Very long lines
699        let long_content = "x".repeat(10000);
700        let long_line = format!("- {long_content}");
701        assert!(ListUtils::is_list_item(&long_line));
702
703        // Many spaces
704        let many_spaces = " ".repeat(100);
705        let spaced_line = format!("{many_spaces}- Item");
706        assert!(ListUtils::is_list_item(&spaced_line));
707
708        // Large ordered number
709        let big_number = format!("{}. Item", "9".repeat(20));
710        assert!(ListUtils::is_list_item(&big_number));
711    }
712
713    #[test]
714    fn test_is_unordered_list_item() {
715        // Valid unordered list items
716        assert!(ListUtils::is_unordered_list_item("- Item"));
717        assert!(ListUtils::is_unordered_list_item("* Item"));
718        assert!(ListUtils::is_unordered_list_item("+ Item"));
719
720        // Invalid - ordered lists
721        assert!(!ListUtils::is_unordered_list_item("1. Item"));
722        assert!(!ListUtils::is_unordered_list_item("99. Item"));
723
724        // Invalid - no space after marker
725        assert!(!ListUtils::is_unordered_list_item("-Item"));
726        assert!(!ListUtils::is_unordered_list_item("*Item"));
727        assert!(!ListUtils::is_unordered_list_item("+Item"));
728    }
729
730    #[test]
731    fn test_calculate_indentation() {
732        // Test that tabs are counted as 4 spaces
733        assert_eq!(ListUtils::calculate_indentation(""), 0);
734        assert_eq!(ListUtils::calculate_indentation("    "), 4);
735        assert_eq!(ListUtils::calculate_indentation("\t"), 4);
736        assert_eq!(ListUtils::calculate_indentation("\t\t"), 8);
737        assert_eq!(ListUtils::calculate_indentation("  \t"), 6); // 2 spaces + 1 tab
738        assert_eq!(ListUtils::calculate_indentation("\t  "), 6); // 1 tab + 2 spaces
739        assert_eq!(ListUtils::calculate_indentation("\t\t  "), 10); // 2 tabs + 2 spaces
740        assert_eq!(ListUtils::calculate_indentation("  \t  \t"), 12); // 2 spaces + tab + 2 spaces + tab
741    }
742
743    #[test]
744    fn test_is_ordered_list_item() {
745        // Valid ordered list items
746        assert!(ListUtils::is_ordered_list_item("1. Item"));
747        assert!(ListUtils::is_ordered_list_item("99. Item"));
748        assert!(ListUtils::is_ordered_list_item("1234567890. Item"));
749
750        // Invalid - unordered lists
751        assert!(!ListUtils::is_ordered_list_item("- Item"));
752        assert!(!ListUtils::is_ordered_list_item("* Item"));
753        assert!(!ListUtils::is_ordered_list_item("+ Item"));
754
755        // Invalid - no space after period
756        assert!(!ListUtils::is_ordered_list_item("1.Item"));
757        assert!(!ListUtils::is_ordered_list_item("99.Item"));
758    }
759}