rumdl_lib/rules/
list_utils.rs

1use fancy_regex::Regex as FancyRegex;
2use lazy_static::lazy_static;
3use regex::Regex;
4
5lazy_static! {
6    // Optimized list detection patterns with anchors and non-capturing groups
7    static ref UNORDERED_LIST_PATTERN: Regex = Regex::new(r"^(\s*)([*+-])(\s+)").unwrap();
8    static ref ORDERED_LIST_PATTERN: Regex = Regex::new(r"^(\s*)(\d+\.)(\s+)").unwrap();
9
10    // Patterns for lists without proper spacing - now excluding emphasis markers
11    static ref UNORDERED_LIST_NO_SPACE_PATTERN: FancyRegex = FancyRegex::new(r"^(\s*)(?:(?<!\*)\*(?!\*)|[+-])([^\s\*])").unwrap();
12    static ref ORDERED_LIST_NO_SPACE_PATTERN: Regex = Regex::new(r"^(\s*)(\d+\.)([^\s])").unwrap();
13
14    // Patterns for lists with multiple spaces
15    static ref UNORDERED_LIST_MULTIPLE_SPACE_PATTERN: Regex = Regex::new(r"^(\s*)([*+-])(\s{2,})").unwrap();
16    static ref ORDERED_LIST_MULTIPLE_SPACE_PATTERN: Regex = Regex::new(r"^(\s*)(\d+\.)(\s{2,})").unwrap();
17
18    // Regex to capture list markers and the spaces *after* them
19    pub static ref LIST_REGEX: Regex = Regex::new(r"^(\s*)([-*+]|\d+\.)(\s*)").unwrap();
20}
21
22/// Enum representing different types of list markers
23#[derive(Debug, Clone, PartialEq)]
24pub enum ListMarkerType {
25    Asterisk,
26    Plus,
27    Minus,
28    Ordered,
29}
30
31/// Struct representing a list item
32#[derive(Debug, Clone)]
33pub struct ListItem {
34    pub indentation: usize,
35    pub marker_type: ListMarkerType,
36    pub marker: String,
37    pub content: String,
38    pub spaces_after_marker: usize,
39}
40
41/// Utility functions for detecting and handling lists in Markdown documents
42pub struct ListUtils;
43
44impl ListUtils {
45    /// Calculate indentation level, counting tabs as 4 spaces per CommonMark spec
46    pub fn calculate_indentation(s: &str) -> usize {
47        s.chars()
48            .take_while(|c| c.is_whitespace())
49            .map(|c| if c == '\t' { 4 } else { 1 })
50            .sum()
51    }
52
53    /// Check if a line is a list item
54    pub fn is_list_item(line: &str) -> bool {
55        // Fast path for common cases
56        if line.is_empty() {
57            return false;
58        }
59
60        let trimmed = line.trim_start();
61        if trimmed.is_empty() {
62            return false;
63        }
64
65        // Quick literal check for common list markers
66        let first_char = trimmed.chars().next().unwrap();
67        match first_char {
68            '*' | '+' | '-' => {
69                if trimmed.len() > 1 {
70                    let second_char = trimmed.chars().nth(1).unwrap();
71                    return second_char.is_whitespace();
72                }
73                false
74            }
75            '0'..='9' => {
76                // Check for ordered list pattern using a literal search first
77                let dot_pos = trimmed.find('.');
78                if let Some(pos) = dot_pos
79                    && pos > 0
80                    && pos < trimmed.len() - 1
81                {
82                    let after_dot = &trimmed[pos + 1..];
83                    return after_dot.starts_with(' ');
84                }
85                false
86            }
87            _ => false,
88        }
89    }
90
91    /// Check if a line is an unordered list item
92    pub fn is_unordered_list_item(line: &str) -> bool {
93        // Fast path for common cases
94        if line.is_empty() {
95            return false;
96        }
97
98        let trimmed = line.trim_start();
99        if trimmed.is_empty() {
100            return false;
101        }
102
103        // Quick literal check for unordered list markers
104        let first_char = trimmed.chars().next().unwrap();
105        if (first_char == '*' || first_char == '+' || first_char == '-') && trimmed.len() > 1 {
106            let second_char = trimmed.chars().nth(1).unwrap();
107            return second_char.is_whitespace();
108        }
109
110        false
111    }
112
113    /// Check if a line is an ordered list item
114    pub fn is_ordered_list_item(line: &str) -> bool {
115        // Fast path for common cases
116        if line.is_empty() {
117            return false;
118        }
119
120        let trimmed = line.trim_start();
121        if trimmed.is_empty() || !trimmed.chars().next().unwrap().is_ascii_digit() {
122            return false;
123        }
124
125        // Check for ordered list pattern using a literal search
126        let dot_pos = trimmed.find('.');
127        if let Some(pos) = dot_pos
128            && pos > 0
129            && pos < trimmed.len() - 1
130        {
131            let after_dot = &trimmed[pos + 1..];
132            return after_dot.starts_with(' ');
133        }
134
135        false
136    }
137
138    /// Check if a line is a list item without proper spacing after the marker
139    pub fn is_list_item_without_space(line: &str) -> bool {
140        // Skip lines that start with double asterisks (bold text)
141        if line.trim_start().starts_with("**") {
142            return false;
143        }
144
145        // Skip lines that have bold/emphasis markers (typically table cells with bold text)
146        if line.trim_start().contains("**") || line.trim_start().contains("__") {
147            return false;
148        }
149
150        // Skip lines that are part of a Markdown table (contain |)
151        if line.contains('|') {
152            return false;
153        }
154
155        // Skip lines that are horizontal rules or table delimiter rows
156        let trimmed = line.trim();
157        if !trimmed.is_empty() {
158            // Check for horizontal rules (only dashes and whitespace)
159            if trimmed.chars().all(|c| c == '-' || c.is_whitespace()) {
160                return false;
161            }
162
163            // Check for table delimiter rows without pipes (e.g., in cases where pipes are optional)
164            // These have dashes and possibly colons for alignment
165            if trimmed.contains('-') && trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) {
166                return false;
167            }
168        }
169
170        // Skip lines that are part of emphasis/bold text
171        if line.trim_start().matches('*').count() >= 2 {
172            return false;
173        }
174
175        // Handle potential regex errors gracefully
176        UNORDERED_LIST_NO_SPACE_PATTERN.is_match(line).unwrap_or(false) || ORDERED_LIST_NO_SPACE_PATTERN.is_match(line)
177    }
178
179    /// Check if a line is a list item with multiple spaces after the marker
180    pub fn is_list_item_with_multiple_spaces(line: &str) -> bool {
181        UNORDERED_LIST_MULTIPLE_SPACE_PATTERN.is_match(line) || ORDERED_LIST_MULTIPLE_SPACE_PATTERN.is_match(line)
182    }
183
184    /// Parse a line as a list item
185    pub fn parse_list_item(line: &str) -> Option<ListItem> {
186        // First try to match unordered list pattern
187        if let Some(captures) = UNORDERED_LIST_PATTERN.captures(line) {
188            let indentation = captures.get(1).map_or(0, |m| Self::calculate_indentation(m.as_str()));
189            let marker = captures.get(2).unwrap().as_str();
190            let spaces = captures.get(3).map_or(0, |m| m.as_str().len());
191            let raw_indentation = captures.get(1).map_or(0, |m| m.as_str().len());
192            let content_start = raw_indentation + marker.len() + spaces;
193            let content = if content_start < line.len() {
194                line[content_start..].to_string()
195            } else {
196                String::new()
197            };
198
199            let marker_type = match marker {
200                "*" => ListMarkerType::Asterisk,
201                "+" => ListMarkerType::Plus,
202                "-" => ListMarkerType::Minus,
203                _ => unreachable!(), // Regex ensures this
204            };
205
206            return Some(ListItem {
207                indentation,
208                marker_type,
209                marker: marker.to_string(),
210                content,
211                spaces_after_marker: spaces,
212            });
213        }
214
215        // Then try to match ordered list pattern
216        if let Some(captures) = ORDERED_LIST_PATTERN.captures(line) {
217            let indentation = captures.get(1).map_or(0, |m| Self::calculate_indentation(m.as_str()));
218            let marker = captures.get(2).unwrap().as_str();
219            let spaces = captures.get(3).map_or(0, |m| m.as_str().len());
220            let raw_indentation = captures.get(1).map_or(0, |m| m.as_str().len());
221            let content_start = raw_indentation + marker.len() + spaces;
222            let content = if content_start < line.len() {
223                line[content_start..].to_string()
224            } else {
225                String::new()
226            };
227
228            return Some(ListItem {
229                indentation,
230                marker_type: ListMarkerType::Ordered,
231                marker: marker.to_string(),
232                content,
233                spaces_after_marker: spaces,
234            });
235        }
236
237        None
238    }
239
240    /// Check if a line is a continuation of a list item
241    pub fn is_list_continuation(line: &str, prev_list_item: &ListItem) -> bool {
242        if line.trim().is_empty() {
243            return false;
244        }
245
246        // Calculate indentation level properly (tabs = 4 spaces)
247        let indentation = Self::calculate_indentation(line);
248
249        // Continuation should be indented at least as much as the content of the previous item
250        let min_indent = prev_list_item.indentation + prev_list_item.marker.len() + prev_list_item.spaces_after_marker;
251        indentation >= min_indent && !Self::is_list_item(line)
252    }
253
254    /// Fix a list item without proper spacing
255    pub fn fix_list_item_without_space(line: &str) -> String {
256        // Handle unordered list items
257        if let Ok(Some(captures)) = UNORDERED_LIST_NO_SPACE_PATTERN.captures(line) {
258            let indentation = captures.get(1).map_or("", |m| m.as_str());
259            let marker = captures.get(2).map_or("", |m| m.as_str());
260            let content = captures.get(3).map_or("", |m| m.as_str());
261            return format!("{indentation}{marker} {content}");
262        }
263
264        // Handle ordered list items
265        if let Some(captures) = ORDERED_LIST_NO_SPACE_PATTERN.captures(line) {
266            let indentation = captures.get(1).map_or("", |m| m.as_str());
267            let marker = captures.get(2).map_or("", |m| m.as_str());
268            let content = captures.get(3).map_or("", |m| m.as_str());
269            return format!("{indentation}{marker} {content}");
270        }
271
272        line.to_string()
273    }
274
275    /// Fix a list item with multiple spaces after the marker
276    pub fn fix_list_item_with_multiple_spaces(line: &str) -> String {
277        if let Some(captures) = UNORDERED_LIST_MULTIPLE_SPACE_PATTERN.captures(line) {
278            let leading_space = captures.get(1).map_or("", |m| m.as_str());
279            let marker = captures.get(2).map_or("", |m| m.as_str());
280            let spaces = captures.get(3).map_or("", |m| m.as_str());
281
282            // Get content after multiple spaces
283            let start_pos = leading_space.len() + marker.len() + spaces.len();
284            let content = if start_pos < line.len() { &line[start_pos..] } else { "" };
285
286            // Replace multiple spaces with a single space
287            return format!("{leading_space}{marker} {content}");
288        }
289
290        if let Some(captures) = ORDERED_LIST_MULTIPLE_SPACE_PATTERN.captures(line) {
291            let leading_space = captures.get(1).map_or("", |m| m.as_str());
292            let marker = captures.get(2).map_or("", |m| m.as_str());
293            let spaces = captures.get(3).map_or("", |m| m.as_str());
294
295            // Get content after multiple spaces
296            let start_pos = leading_space.len() + marker.len() + spaces.len();
297            let content = if start_pos < line.len() { &line[start_pos..] } else { "" };
298
299            // Replace multiple spaces with a single space
300            return format!("{leading_space}{marker} {content}");
301        }
302
303        // Return the original line if no pattern matched
304        line.to_string()
305    }
306}
307
308#[derive(Debug, Clone, Copy, PartialEq, Eq)]
309pub enum ListType {
310    Unordered,
311    Ordered,
312}
313
314/// Returns (ListType, matched string, number of spaces after marker) if the line is a list item
315pub fn is_list_item(line: &str) -> Option<(ListType, String, usize)> {
316    let trimmed_line = line.trim();
317    if trimmed_line.is_empty() {
318        return None;
319    }
320    // Horizontal rule check (--- or ***)
321    if trimmed_line.chars().all(|c| c == '-' || c == ' ') && trimmed_line.chars().filter(|&c| c == '-').count() >= 3 {
322        return None;
323    }
324    if trimmed_line.chars().all(|c| c == '*' || c == ' ') && trimmed_line.chars().filter(|&c| c == '*').count() >= 3 {
325        return None;
326    }
327    if let Some(cap) = LIST_REGEX.captures(line) {
328        let marker = &cap[2];
329        let spaces = cap[3].len();
330        let list_type = if marker.chars().next().is_some_and(|c| c.is_ascii_digit()) {
331            ListType::Ordered
332        } else {
333            ListType::Unordered
334        };
335        return Some((list_type, cap[0].to_string(), spaces));
336    }
337    None
338}
339
340/// Returns true if the list item at lines[current_idx] is a multi-line item
341pub fn is_multi_line_item(lines: &[&str], current_idx: usize) -> bool {
342    if current_idx >= lines.len() - 1 {
343        return false;
344    }
345    let next_line = lines[current_idx + 1].trim();
346    if next_line.is_empty() {
347        return false;
348    }
349    if is_list_item(next_line).is_some() {
350        return false;
351    }
352    let curr_indent = ListUtils::calculate_indentation(lines[current_idx]);
353    let next_indent = ListUtils::calculate_indentation(lines[current_idx + 1]);
354    next_indent > curr_indent
355}
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360
361    #[test]
362    fn test_is_list_item_without_space() {
363        // Valid list item with space after marker
364        assert!(!ListUtils::is_list_item_without_space("- Item with space"));
365        assert!(!ListUtils::is_list_item_without_space("* Item with space"));
366        assert!(!ListUtils::is_list_item_without_space("+ Item with space"));
367        assert!(!ListUtils::is_list_item_without_space("1. Item with space"));
368
369        // Invalid list items without space after marker (should return true)
370        assert!(ListUtils::is_list_item_without_space("-No space"));
371        assert!(ListUtils::is_list_item_without_space("*No space"));
372        assert!(ListUtils::is_list_item_without_space("+No space"));
373        assert!(ListUtils::is_list_item_without_space("1.No space"));
374
375        // Not list items (should return false)
376        assert!(!ListUtils::is_list_item_without_space("Regular text"));
377        assert!(!ListUtils::is_list_item_without_space(""));
378        assert!(!ListUtils::is_list_item_without_space("    "));
379        assert!(!ListUtils::is_list_item_without_space("# Heading"));
380
381        // Bold/emphasis text that might be confused with list items (should return false)
382        assert!(!ListUtils::is_list_item_without_space("**Bold text**"));
383        assert!(!ListUtils::is_list_item_without_space("__Bold text__"));
384        assert!(!ListUtils::is_list_item_without_space("*Italic text*"));
385        assert!(!ListUtils::is_list_item_without_space("_Italic text_"));
386
387        // Table cells with bold/emphasis (should return false)
388        assert!(!ListUtils::is_list_item_without_space("| **Heading** | Content |"));
389        assert!(!ListUtils::is_list_item_without_space("**Bold** | Normal"));
390        assert!(!ListUtils::is_list_item_without_space("| Cell 1 | **Bold** |"));
391
392        // Horizontal rules (should return false)
393        assert!(!ListUtils::is_list_item_without_space("---"));
394        assert!(!ListUtils::is_list_item_without_space("----------"));
395        assert!(!ListUtils::is_list_item_without_space("   ---   "));
396
397        // Table delimiter rows (should return false)
398        assert!(!ListUtils::is_list_item_without_space("|--------|---------|"));
399        assert!(!ListUtils::is_list_item_without_space("|:-------|:-------:|"));
400        assert!(!ListUtils::is_list_item_without_space("| ------ | ------- |"));
401        assert!(!ListUtils::is_list_item_without_space("---------|----------|"));
402        assert!(!ListUtils::is_list_item_without_space(":--------|:--------:"));
403    }
404
405    #[test]
406    fn test_is_list_item() {
407        // Valid list items
408        assert!(ListUtils::is_list_item("- Item"));
409        assert!(ListUtils::is_list_item("* Item"));
410        assert!(ListUtils::is_list_item("+ Item"));
411        assert!(ListUtils::is_list_item("1. Item"));
412        assert!(ListUtils::is_list_item("  - Indented item"));
413
414        // Not list items
415        assert!(!ListUtils::is_list_item("Regular text"));
416        assert!(!ListUtils::is_list_item(""));
417        assert!(!ListUtils::is_list_item("    "));
418        assert!(!ListUtils::is_list_item("# Heading"));
419        assert!(!ListUtils::is_list_item("**Bold text**"));
420        assert!(!ListUtils::is_list_item("| Cell 1 | Cell 2 |"));
421    }
422
423    #[test]
424    fn test_complex_nested_lists() {
425        // Various indentation levels
426        assert!(ListUtils::is_list_item("- Level 1"));
427        assert!(ListUtils::is_list_item("  - Level 2"));
428        assert!(ListUtils::is_list_item("    - Level 3"));
429        assert!(ListUtils::is_list_item("      - Level 4"));
430        assert!(ListUtils::is_list_item("        - Level 5"));
431
432        // Mixed markers in nested lists
433        assert!(ListUtils::is_list_item("* Main item"));
434        assert!(ListUtils::is_list_item("  - Sub item"));
435        assert!(ListUtils::is_list_item("    + Sub-sub item"));
436        assert!(ListUtils::is_list_item("      * Deep item"));
437
438        // Ordered lists nested in unordered
439        assert!(ListUtils::is_list_item("- Unordered"));
440        assert!(ListUtils::is_list_item("  1. First ordered"));
441        assert!(ListUtils::is_list_item("  2. Second ordered"));
442        assert!(ListUtils::is_list_item("    - Back to unordered"));
443
444        // Tab indentation
445        assert!(ListUtils::is_list_item("\t- Tab indented"));
446        assert!(ListUtils::is_list_item("\t\t- Double tab"));
447        assert!(ListUtils::is_list_item("\t  - Tab plus spaces"));
448        assert!(ListUtils::is_list_item("  \t- Spaces plus tab"));
449    }
450
451    #[test]
452    fn test_parse_list_item_edge_cases() {
453        // Unicode content
454        let unicode_item = ListUtils::parse_list_item("- ๆต‹่ฏ•้กน็›ฎ ๐Ÿš€").unwrap();
455        assert_eq!(unicode_item.content, "ๆต‹่ฏ•้กน็›ฎ ๐Ÿš€");
456
457        // Empty content after marker
458        let empty_item = ListUtils::parse_list_item("- ").unwrap();
459        assert_eq!(empty_item.content, "");
460
461        // Multiple spaces after marker
462        let multi_space = ListUtils::parse_list_item("-   Multiple spaces").unwrap();
463        assert_eq!(multi_space.spaces_after_marker, 3);
464        assert_eq!(multi_space.content, "Multiple spaces");
465
466        // Very long ordered list numbers
467        let long_number = ListUtils::parse_list_item("999999. Item").unwrap();
468        assert_eq!(long_number.marker, "999999.");
469        assert_eq!(long_number.marker_type, ListMarkerType::Ordered);
470
471        // List with only marker - might not parse as valid list
472        if let Some(marker_only) = ListUtils::parse_list_item("*") {
473            assert_eq!(marker_only.content, "");
474            assert_eq!(marker_only.spaces_after_marker, 0);
475        }
476    }
477
478    #[test]
479    fn test_nested_list_detection() {
480        // Test detection of list items at various nesting levels
481        let lines = vec![
482            ("- Item 1", 0),
483            ("  - Item 1.1", 2),
484            ("    - Item 1.1.1", 4),
485            ("      - Item 1.1.1.1", 6),
486            ("    - Item 1.1.2", 4),
487            ("  - Item 1.2", 2),
488            ("- Item 2", 0),
489        ];
490
491        for (line, expected_indent) in lines {
492            let item = ListUtils::parse_list_item(line).unwrap();
493            assert_eq!(item.indentation, expected_indent, "Failed for line: {line}");
494        }
495    }
496
497    #[test]
498    fn test_mixed_list_markers() {
499        // Test different marker types
500        let markers = vec![
501            ("* Asterisk", ListMarkerType::Asterisk),
502            ("+ Plus", ListMarkerType::Plus),
503            ("- Minus", ListMarkerType::Minus),
504            ("1. Ordered", ListMarkerType::Ordered),
505            ("42. Ordered", ListMarkerType::Ordered),
506        ];
507
508        for (line, expected_type) in markers {
509            let item = ListUtils::parse_list_item(line).unwrap();
510            assert_eq!(item.marker_type, expected_type, "Failed for line: {line}");
511        }
512    }
513
514    #[test]
515    fn test_list_item_without_space_edge_cases() {
516        // Edge cases for missing spaces
517        assert!(ListUtils::is_list_item_without_space("*a"));
518        assert!(ListUtils::is_list_item_without_space("+b"));
519        assert!(ListUtils::is_list_item_without_space("-c"));
520        assert!(ListUtils::is_list_item_without_space("1.d"));
521
522        // Single character lines
523        assert!(!ListUtils::is_list_item_without_space("*"));
524        assert!(!ListUtils::is_list_item_without_space("+"));
525        assert!(!ListUtils::is_list_item_without_space("-"));
526
527        // Markers at end of line
528        assert!(!ListUtils::is_list_item_without_space("Text ends with -"));
529        assert!(!ListUtils::is_list_item_without_space("Text ends with *"));
530        assert!(!ListUtils::is_list_item_without_space("Number ends with 1."));
531    }
532
533    #[test]
534    fn test_list_item_with_multiple_spaces() {
535        // Test detection of multiple spaces after marker
536        assert!(ListUtils::is_list_item_with_multiple_spaces("-  Two spaces"));
537        assert!(ListUtils::is_list_item_with_multiple_spaces("*   Three spaces"));
538        assert!(ListUtils::is_list_item_with_multiple_spaces("+    Four spaces"));
539        assert!(ListUtils::is_list_item_with_multiple_spaces("1.  Two spaces"));
540
541        // Should not match single space
542        assert!(!ListUtils::is_list_item_with_multiple_spaces("- One space"));
543        assert!(!ListUtils::is_list_item_with_multiple_spaces("* One space"));
544        assert!(!ListUtils::is_list_item_with_multiple_spaces("+ One space"));
545        assert!(!ListUtils::is_list_item_with_multiple_spaces("1. One space"));
546    }
547
548    #[test]
549    fn test_complex_content_in_lists() {
550        // List items with inline formatting
551        let bold_item = ListUtils::parse_list_item("- **Bold** content").unwrap();
552        assert_eq!(bold_item.content, "**Bold** content");
553
554        let link_item = ListUtils::parse_list_item("* [Link](url) in list").unwrap();
555        assert_eq!(link_item.content, "[Link](url) in list");
556
557        let code_item = ListUtils::parse_list_item("+ Item with `code`").unwrap();
558        assert_eq!(code_item.content, "Item with `code`");
559
560        // List with inline HTML
561        let html_item = ListUtils::parse_list_item("- Item with <span>HTML</span>").unwrap();
562        assert_eq!(html_item.content, "Item with <span>HTML</span>");
563
564        // List with emoji
565        let emoji_item = ListUtils::parse_list_item("1. ๐ŸŽ‰ Party time!").unwrap();
566        assert_eq!(emoji_item.content, "๐ŸŽ‰ Party time!");
567    }
568
569    #[test]
570    fn test_ambiguous_list_markers() {
571        // Test cases that might be ambiguous
572
573        // Arithmetic expressions should not be lists
574        assert!(!ListUtils::is_list_item("2 + 2 = 4"));
575        assert!(!ListUtils::is_list_item("5 - 3 = 2"));
576        assert!(!ListUtils::is_list_item("3 * 3 = 9"));
577
578        // Emphasis markers should not be lists
579        assert!(!ListUtils::is_list_item("*emphasis*"));
580        assert!(!ListUtils::is_list_item("**strong**"));
581        assert!(!ListUtils::is_list_item("***strong emphasis***"));
582
583        // Date ranges
584        assert!(!ListUtils::is_list_item("2023-01-01 - 2023-12-31"));
585
586        // But these should be lists
587        assert!(ListUtils::is_list_item("- 2023-01-01 - 2023-12-31"));
588        assert!(ListUtils::is_list_item("* emphasis text here"));
589    }
590
591    #[test]
592    fn test_deeply_nested_complex_lists() {
593        let complex_doc = vec![
594            "- Top level item",
595            "  - Second level with **bold**",
596            "    1. Ordered item with `code`",
597            "    2. Another ordered item",
598            "      - Back to unordered [link](url)",
599            "        * Different marker",
600            "          + Yet another marker",
601            "            - Maximum nesting?",
602            "              1. Can we go deeper?",
603            "                - Apparently yes!",
604        ];
605
606        for line in complex_doc {
607            assert!(ListUtils::is_list_item(line), "Failed to recognize: {line}");
608            let item = ListUtils::parse_list_item(line).unwrap();
609            assert!(
610                !item.content.is_empty()
611                    || line.trim().ends_with('-')
612                    || line.trim().ends_with('*')
613                    || line.trim().ends_with('+')
614            );
615        }
616    }
617
618    #[test]
619    fn test_parse_list_item_comprehensive() {
620        // Test the comprehensive parsing with expected values
621        let test_cases = vec![
622            ("- Simple item", 0, ListMarkerType::Minus, "-", "Simple item"),
623            ("  * Indented", 2, ListMarkerType::Asterisk, "*", "Indented"),
624            ("    1. Ordered", 4, ListMarkerType::Ordered, "1.", "Ordered"),
625            ("\t+ Tab indent", 4, ListMarkerType::Plus, "+", "Tab indent"), // Tab counts as 4 spaces per CommonMark
626        ];
627
628        for (line, expected_indent, expected_type, expected_marker, expected_content) in test_cases {
629            let item = ListUtils::parse_list_item(line);
630            assert!(item.is_some(), "Failed to parse: {line}");
631            let item = item.unwrap();
632            assert_eq!(item.indentation, expected_indent, "Wrong indentation for: {line}");
633            assert_eq!(item.marker_type, expected_type, "Wrong marker type for: {line}");
634            assert_eq!(item.marker, expected_marker, "Wrong marker for: {line}");
635            assert_eq!(item.content, expected_content, "Wrong content for: {line}");
636        }
637    }
638
639    #[test]
640    fn test_special_characters_in_lists() {
641        // Test with special characters that might break regex
642        let special_cases = vec![
643            "- Item with $ dollar sign",
644            "* Item with ^ caret",
645            "+ Item with \\ backslash",
646            "- Item with | pipe",
647            "1. Item with ( ) parentheses",
648            "2. Item with [ ] brackets",
649            "3. Item with { } braces",
650        ];
651
652        for line in special_cases {
653            assert!(ListUtils::is_list_item(line), "Failed for: {line}");
654            let item = ListUtils::parse_list_item(line);
655            assert!(item.is_some(), "Failed to parse: {line}");
656        }
657    }
658
659    #[test]
660    fn test_list_continuations() {
661        // Lists that continue on multiple lines (not directly supported but shouldn't crash)
662        let continuation = "- This is a very long list item that \
663                           continues on the next line";
664        assert!(ListUtils::is_list_item(continuation));
665
666        // Indented continuation
667        let indented_cont = "  - Another long item that \
668                               continues with proper indentation";
669        assert!(ListUtils::is_list_item(indented_cont));
670    }
671
672    #[test]
673    fn test_performance_edge_cases() {
674        // Very long lines
675        let long_content = "x".repeat(10000);
676        let long_line = format!("- {long_content}");
677        assert!(ListUtils::is_list_item(&long_line));
678
679        // Many spaces
680        let many_spaces = " ".repeat(100);
681        let spaced_line = format!("{many_spaces}- Item");
682        assert!(ListUtils::is_list_item(&spaced_line));
683
684        // Large ordered number
685        let big_number = format!("{}. Item", "9".repeat(20));
686        assert!(ListUtils::is_list_item(&big_number));
687    }
688
689    #[test]
690    fn test_is_unordered_list_item() {
691        // Valid unordered list items
692        assert!(ListUtils::is_unordered_list_item("- Item"));
693        assert!(ListUtils::is_unordered_list_item("* Item"));
694        assert!(ListUtils::is_unordered_list_item("+ Item"));
695
696        // Invalid - ordered lists
697        assert!(!ListUtils::is_unordered_list_item("1. Item"));
698        assert!(!ListUtils::is_unordered_list_item("99. Item"));
699
700        // Invalid - no space after marker
701        assert!(!ListUtils::is_unordered_list_item("-Item"));
702        assert!(!ListUtils::is_unordered_list_item("*Item"));
703        assert!(!ListUtils::is_unordered_list_item("+Item"));
704    }
705
706    #[test]
707    fn test_calculate_indentation() {
708        // Test that tabs are counted as 4 spaces
709        assert_eq!(ListUtils::calculate_indentation(""), 0);
710        assert_eq!(ListUtils::calculate_indentation("    "), 4);
711        assert_eq!(ListUtils::calculate_indentation("\t"), 4);
712        assert_eq!(ListUtils::calculate_indentation("\t\t"), 8);
713        assert_eq!(ListUtils::calculate_indentation("  \t"), 6); // 2 spaces + 1 tab
714        assert_eq!(ListUtils::calculate_indentation("\t  "), 6); // 1 tab + 2 spaces
715        assert_eq!(ListUtils::calculate_indentation("\t\t  "), 10); // 2 tabs + 2 spaces
716        assert_eq!(ListUtils::calculate_indentation("  \t  \t"), 12); // 2 spaces + tab + 2 spaces + tab
717    }
718
719    #[test]
720    fn test_is_ordered_list_item() {
721        // Valid ordered list items
722        assert!(ListUtils::is_ordered_list_item("1. Item"));
723        assert!(ListUtils::is_ordered_list_item("99. Item"));
724        assert!(ListUtils::is_ordered_list_item("1234567890. Item"));
725
726        // Invalid - unordered lists
727        assert!(!ListUtils::is_ordered_list_item("- Item"));
728        assert!(!ListUtils::is_ordered_list_item("* Item"));
729        assert!(!ListUtils::is_ordered_list_item("+ Item"));
730
731        // Invalid - no space after period
732        assert!(!ListUtils::is_ordered_list_item("1.Item"));
733        assert!(!ListUtils::is_ordered_list_item("99.Item"));
734    }
735}