rumdl_lib/rules/
list_utils.rs

1use fancy_regex::Regex as FancyRegex;
2use lazy_static::lazy_static;
3use regex::Regex;
4
5lazy_static! {
6    // Optimized list detection patterns with anchors and non-capturing groups
7    static ref UNORDERED_LIST_PATTERN: Regex = Regex::new(r"^(\s*)([*+-])(\s+)").unwrap();
8    static ref ORDERED_LIST_PATTERN: Regex = Regex::new(r"^(\s*)(\d+\.)(\s+)").unwrap();
9
10    // Patterns for lists without proper spacing - now excluding emphasis markers
11    static ref UNORDERED_LIST_NO_SPACE_PATTERN: FancyRegex = FancyRegex::new(r"^(\s*)(?:(?<!\*)\*(?!\*)|[+-])([^\s\*])").unwrap();
12    static ref ORDERED_LIST_NO_SPACE_PATTERN: Regex = Regex::new(r"^(\s*)(\d+\.)([^\s])").unwrap();
13
14    // Patterns for lists with multiple spaces
15    static ref UNORDERED_LIST_MULTIPLE_SPACE_PATTERN: Regex = Regex::new(r"^(\s*)([*+-])(\s{2,})").unwrap();
16    static ref ORDERED_LIST_MULTIPLE_SPACE_PATTERN: Regex = Regex::new(r"^(\s*)(\d+\.)(\s{2,})").unwrap();
17
18    // Regex to capture list markers and the spaces *after* them
19    pub static ref LIST_REGEX: Regex = Regex::new(r"^(\s*)([-*+]|\d+\.)(\s*)").unwrap();
20}
21
22/// Enum representing different types of list markers
23#[derive(Debug, Clone, PartialEq)]
24pub enum ListMarkerType {
25    Asterisk,
26    Plus,
27    Minus,
28    Ordered,
29}
30
31/// Struct representing a list item
32#[derive(Debug, Clone)]
33pub struct ListItem {
34    pub indentation: usize,
35    pub marker_type: ListMarkerType,
36    pub marker: String,
37    pub content: String,
38    pub spaces_after_marker: usize,
39}
40
41/// Utility functions for detecting and handling lists in Markdown documents
42pub struct ListUtils;
43
44impl ListUtils {
45    /// Calculate indentation level, counting tabs as 4 spaces per CommonMark spec
46    pub fn calculate_indentation(s: &str) -> usize {
47        s.chars()
48            .take_while(|c| c.is_whitespace())
49            .map(|c| if c == '\t' { 4 } else { 1 })
50            .sum()
51    }
52
53    /// Check if a line is a list item
54    pub fn is_list_item(line: &str) -> bool {
55        // Fast path for common cases
56        if line.is_empty() {
57            return false;
58        }
59
60        let trimmed = line.trim_start();
61        if trimmed.is_empty() {
62            return false;
63        }
64
65        // Quick literal check for common list markers
66        let Some(first_char) = trimmed.chars().next() else {
67            return false;
68        };
69        match first_char {
70            '*' | '+' | '-' => {
71                if trimmed.len() > 1 {
72                    let mut chars = trimmed.chars();
73                    chars.next(); // Skip first char
74                    if let Some(second_char) = chars.next() {
75                        return second_char.is_whitespace();
76                    }
77                }
78                false
79            }
80            '0'..='9' => {
81                // Check for ordered list pattern using a literal search first
82                let dot_pos = trimmed.find('.');
83                if let Some(pos) = dot_pos
84                    && pos > 0
85                    && pos < trimmed.len() - 1
86                {
87                    let after_dot = &trimmed[pos + 1..];
88                    return after_dot.starts_with(' ');
89                }
90                false
91            }
92            _ => false,
93        }
94    }
95
96    /// Check if a line is an unordered list item
97    pub fn is_unordered_list_item(line: &str) -> bool {
98        // Fast path for common cases
99        if line.is_empty() {
100            return false;
101        }
102
103        let trimmed = line.trim_start();
104        if trimmed.is_empty() {
105            return false;
106        }
107
108        // Quick literal check for unordered list markers
109        let Some(first_char) = trimmed.chars().next() else {
110            return false;
111        };
112        if (first_char == '*' || first_char == '+' || first_char == '-')
113            && trimmed.len() > 1
114            && let Some(second_char) = trimmed.chars().nth(1)
115        {
116            return second_char.is_whitespace();
117        }
118
119        false
120    }
121
122    /// Check if a line is an ordered list item
123    pub fn is_ordered_list_item(line: &str) -> bool {
124        // Fast path for common cases
125        if line.is_empty() {
126            return false;
127        }
128
129        let trimmed = line.trim_start();
130        if trimmed.is_empty() {
131            return false;
132        }
133
134        let Some(first_char) = trimmed.chars().next() else {
135            return false;
136        };
137
138        if !first_char.is_ascii_digit() {
139            return false;
140        }
141
142        // Check for ordered list pattern using a literal search
143        let dot_pos = trimmed.find('.');
144        if let Some(pos) = dot_pos
145            && pos > 0
146            && pos < trimmed.len() - 1
147        {
148            let after_dot = &trimmed[pos + 1..];
149            return after_dot.starts_with(' ');
150        }
151
152        false
153    }
154
155    /// Check if a line is a list item without proper spacing after the marker
156    pub fn is_list_item_without_space(line: &str) -> bool {
157        // Skip lines that start with double asterisks (bold text)
158        if line.trim_start().starts_with("**") {
159            return false;
160        }
161
162        // Skip lines that have bold/emphasis markers (typically table cells with bold text)
163        if line.trim_start().contains("**") || line.trim_start().contains("__") {
164            return false;
165        }
166
167        // Skip lines that are part of a Markdown table (contain |)
168        if line.contains('|') {
169            return false;
170        }
171
172        // Skip lines that are horizontal rules or table delimiter rows
173        let trimmed = line.trim();
174        if !trimmed.is_empty() {
175            // Check for horizontal rules (only dashes and whitespace)
176            if trimmed.chars().all(|c| c == '-' || c.is_whitespace()) {
177                return false;
178            }
179
180            // Check for table delimiter rows without pipes (e.g., in cases where pipes are optional)
181            // These have dashes and possibly colons for alignment
182            if trimmed.contains('-') && trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) {
183                return false;
184            }
185        }
186
187        // Skip lines that are part of emphasis/bold text
188        if line.trim_start().matches('*').count() >= 2 {
189            return false;
190        }
191
192        // Handle potential regex errors gracefully
193        UNORDERED_LIST_NO_SPACE_PATTERN.is_match(line).unwrap_or(false) || ORDERED_LIST_NO_SPACE_PATTERN.is_match(line)
194    }
195
196    /// Check if a line is a list item with multiple spaces after the marker
197    pub fn is_list_item_with_multiple_spaces(line: &str) -> bool {
198        UNORDERED_LIST_MULTIPLE_SPACE_PATTERN.is_match(line) || ORDERED_LIST_MULTIPLE_SPACE_PATTERN.is_match(line)
199    }
200
201    /// Parse a line as a list item
202    pub fn parse_list_item(line: &str) -> Option<ListItem> {
203        // First try to match unordered list pattern
204        if let Some(captures) = UNORDERED_LIST_PATTERN.captures(line) {
205            let indentation = captures.get(1).map_or(0, |m| Self::calculate_indentation(m.as_str()));
206            let marker = captures.get(2).unwrap().as_str();
207            let spaces = captures.get(3).map_or(0, |m| m.as_str().len());
208            let raw_indentation = captures.get(1).map_or(0, |m| m.as_str().len());
209            let content_start = raw_indentation + marker.len() + spaces;
210            let content = if content_start < line.len() {
211                line[content_start..].to_string()
212            } else {
213                String::new()
214            };
215
216            let marker_type = match marker {
217                "*" => ListMarkerType::Asterisk,
218                "+" => ListMarkerType::Plus,
219                "-" => ListMarkerType::Minus,
220                _ => unreachable!(), // Regex ensures this
221            };
222
223            return Some(ListItem {
224                indentation,
225                marker_type,
226                marker: marker.to_string(),
227                content,
228                spaces_after_marker: spaces,
229            });
230        }
231
232        // Then try to match ordered list pattern
233        if let Some(captures) = ORDERED_LIST_PATTERN.captures(line) {
234            let indentation = captures.get(1).map_or(0, |m| Self::calculate_indentation(m.as_str()));
235            let marker = captures.get(2).unwrap().as_str();
236            let spaces = captures.get(3).map_or(0, |m| m.as_str().len());
237            let raw_indentation = captures.get(1).map_or(0, |m| m.as_str().len());
238            let content_start = raw_indentation + marker.len() + spaces;
239            let content = if content_start < line.len() {
240                line[content_start..].to_string()
241            } else {
242                String::new()
243            };
244
245            return Some(ListItem {
246                indentation,
247                marker_type: ListMarkerType::Ordered,
248                marker: marker.to_string(),
249                content,
250                spaces_after_marker: spaces,
251            });
252        }
253
254        None
255    }
256
257    /// Check if a line is a continuation of a list item
258    pub fn is_list_continuation(line: &str, prev_list_item: &ListItem) -> bool {
259        if line.trim().is_empty() {
260            return false;
261        }
262
263        // Calculate indentation level properly (tabs = 4 spaces)
264        let indentation = Self::calculate_indentation(line);
265
266        // Continuation should be indented at least as much as the content of the previous item
267        let min_indent = prev_list_item.indentation + prev_list_item.marker.len() + prev_list_item.spaces_after_marker;
268        indentation >= min_indent && !Self::is_list_item(line)
269    }
270
271    /// Fix a list item without proper spacing
272    pub fn fix_list_item_without_space(line: &str) -> String {
273        // Handle unordered list items
274        if let Ok(Some(captures)) = UNORDERED_LIST_NO_SPACE_PATTERN.captures(line) {
275            let indentation = captures.get(1).map_or("", |m| m.as_str());
276            let marker = captures.get(2).map_or("", |m| m.as_str());
277            let content = captures.get(3).map_or("", |m| m.as_str());
278            return format!("{indentation}{marker} {content}");
279        }
280
281        // Handle ordered list items
282        if let Some(captures) = ORDERED_LIST_NO_SPACE_PATTERN.captures(line) {
283            let indentation = captures.get(1).map_or("", |m| m.as_str());
284            let marker = captures.get(2).map_or("", |m| m.as_str());
285            let content = captures.get(3).map_or("", |m| m.as_str());
286            return format!("{indentation}{marker} {content}");
287        }
288
289        line.to_string()
290    }
291
292    /// Fix a list item with multiple spaces after the marker
293    pub fn fix_list_item_with_multiple_spaces(line: &str) -> String {
294        if let Some(captures) = UNORDERED_LIST_MULTIPLE_SPACE_PATTERN.captures(line) {
295            let leading_space = captures.get(1).map_or("", |m| m.as_str());
296            let marker = captures.get(2).map_or("", |m| m.as_str());
297            let spaces = captures.get(3).map_or("", |m| m.as_str());
298
299            // Get content after multiple spaces
300            let start_pos = leading_space.len() + marker.len() + spaces.len();
301            let content = if start_pos < line.len() { &line[start_pos..] } else { "" };
302
303            // Replace multiple spaces with a single space
304            return format!("{leading_space}{marker} {content}");
305        }
306
307        if let Some(captures) = ORDERED_LIST_MULTIPLE_SPACE_PATTERN.captures(line) {
308            let leading_space = captures.get(1).map_or("", |m| m.as_str());
309            let marker = captures.get(2).map_or("", |m| m.as_str());
310            let spaces = captures.get(3).map_or("", |m| m.as_str());
311
312            // Get content after multiple spaces
313            let start_pos = leading_space.len() + marker.len() + spaces.len();
314            let content = if start_pos < line.len() { &line[start_pos..] } else { "" };
315
316            // Replace multiple spaces with a single space
317            return format!("{leading_space}{marker} {content}");
318        }
319
320        // Return the original line if no pattern matched
321        line.to_string()
322    }
323}
324
325#[derive(Debug, Clone, Copy, PartialEq, Eq)]
326pub enum ListType {
327    Unordered,
328    Ordered,
329}
330
331/// Returns (ListType, matched string, number of spaces after marker) if the line is a list item
332pub fn is_list_item(line: &str) -> Option<(ListType, String, usize)> {
333    let trimmed_line = line.trim();
334    if trimmed_line.is_empty() {
335        return None;
336    }
337    // Horizontal rule check (--- or ***)
338    if trimmed_line.chars().all(|c| c == '-' || c == ' ') && trimmed_line.chars().filter(|&c| c == '-').count() >= 3 {
339        return None;
340    }
341    if trimmed_line.chars().all(|c| c == '*' || c == ' ') && trimmed_line.chars().filter(|&c| c == '*').count() >= 3 {
342        return None;
343    }
344    if let Some(cap) = LIST_REGEX.captures(line) {
345        let marker = &cap[2];
346        let spaces = cap[3].len();
347        let list_type = if marker.chars().next().is_some_and(|c| c.is_ascii_digit()) {
348            ListType::Ordered
349        } else {
350            ListType::Unordered
351        };
352        return Some((list_type, cap[0].to_string(), spaces));
353    }
354    None
355}
356
357/// Returns true if the list item at lines[current_idx] is a multi-line item
358pub fn is_multi_line_item(lines: &[&str], current_idx: usize) -> bool {
359    if current_idx >= lines.len() - 1 {
360        return false;
361    }
362    let next_line = lines[current_idx + 1].trim();
363    if next_line.is_empty() {
364        return false;
365    }
366    if is_list_item(next_line).is_some() {
367        return false;
368    }
369    let curr_indent = ListUtils::calculate_indentation(lines[current_idx]);
370    let next_indent = ListUtils::calculate_indentation(lines[current_idx + 1]);
371    next_indent > curr_indent
372}
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377
378    #[test]
379    fn test_is_list_item_without_space() {
380        // Valid list item with space after marker
381        assert!(!ListUtils::is_list_item_without_space("- Item with space"));
382        assert!(!ListUtils::is_list_item_without_space("* Item with space"));
383        assert!(!ListUtils::is_list_item_without_space("+ Item with space"));
384        assert!(!ListUtils::is_list_item_without_space("1. Item with space"));
385
386        // Invalid list items without space after marker (should return true)
387        assert!(ListUtils::is_list_item_without_space("-No space"));
388        assert!(ListUtils::is_list_item_without_space("*No space"));
389        assert!(ListUtils::is_list_item_without_space("+No space"));
390        assert!(ListUtils::is_list_item_without_space("1.No space"));
391
392        // Not list items (should return false)
393        assert!(!ListUtils::is_list_item_without_space("Regular text"));
394        assert!(!ListUtils::is_list_item_without_space(""));
395        assert!(!ListUtils::is_list_item_without_space("    "));
396        assert!(!ListUtils::is_list_item_without_space("# Heading"));
397
398        // Bold/emphasis text that might be confused with list items (should return false)
399        assert!(!ListUtils::is_list_item_without_space("**Bold text**"));
400        assert!(!ListUtils::is_list_item_without_space("__Bold text__"));
401        assert!(!ListUtils::is_list_item_without_space("*Italic text*"));
402        assert!(!ListUtils::is_list_item_without_space("_Italic text_"));
403
404        // Table cells with bold/emphasis (should return false)
405        assert!(!ListUtils::is_list_item_without_space("| **Heading** | Content |"));
406        assert!(!ListUtils::is_list_item_without_space("**Bold** | Normal"));
407        assert!(!ListUtils::is_list_item_without_space("| Cell 1 | **Bold** |"));
408
409        // Horizontal rules (should return false)
410        assert!(!ListUtils::is_list_item_without_space("---"));
411        assert!(!ListUtils::is_list_item_without_space("----------"));
412        assert!(!ListUtils::is_list_item_without_space("   ---   "));
413
414        // Table delimiter rows (should return false)
415        assert!(!ListUtils::is_list_item_without_space("|--------|---------|"));
416        assert!(!ListUtils::is_list_item_without_space("|:-------|:-------:|"));
417        assert!(!ListUtils::is_list_item_without_space("| ------ | ------- |"));
418        assert!(!ListUtils::is_list_item_without_space("---------|----------|"));
419        assert!(!ListUtils::is_list_item_without_space(":--------|:--------:"));
420    }
421
422    #[test]
423    fn test_is_list_item() {
424        // Valid list items
425        assert!(ListUtils::is_list_item("- Item"));
426        assert!(ListUtils::is_list_item("* Item"));
427        assert!(ListUtils::is_list_item("+ Item"));
428        assert!(ListUtils::is_list_item("1. Item"));
429        assert!(ListUtils::is_list_item("  - Indented item"));
430
431        // Not list items
432        assert!(!ListUtils::is_list_item("Regular text"));
433        assert!(!ListUtils::is_list_item(""));
434        assert!(!ListUtils::is_list_item("    "));
435        assert!(!ListUtils::is_list_item("# Heading"));
436        assert!(!ListUtils::is_list_item("**Bold text**"));
437        assert!(!ListUtils::is_list_item("| Cell 1 | Cell 2 |"));
438    }
439
440    #[test]
441    fn test_complex_nested_lists() {
442        // Various indentation levels
443        assert!(ListUtils::is_list_item("- Level 1"));
444        assert!(ListUtils::is_list_item("  - Level 2"));
445        assert!(ListUtils::is_list_item("    - Level 3"));
446        assert!(ListUtils::is_list_item("      - Level 4"));
447        assert!(ListUtils::is_list_item("        - Level 5"));
448
449        // Mixed markers in nested lists
450        assert!(ListUtils::is_list_item("* Main item"));
451        assert!(ListUtils::is_list_item("  - Sub item"));
452        assert!(ListUtils::is_list_item("    + Sub-sub item"));
453        assert!(ListUtils::is_list_item("      * Deep item"));
454
455        // Ordered lists nested in unordered
456        assert!(ListUtils::is_list_item("- Unordered"));
457        assert!(ListUtils::is_list_item("  1. First ordered"));
458        assert!(ListUtils::is_list_item("  2. Second ordered"));
459        assert!(ListUtils::is_list_item("    - Back to unordered"));
460
461        // Tab indentation
462        assert!(ListUtils::is_list_item("\t- Tab indented"));
463        assert!(ListUtils::is_list_item("\t\t- Double tab"));
464        assert!(ListUtils::is_list_item("\t  - Tab plus spaces"));
465        assert!(ListUtils::is_list_item("  \t- Spaces plus tab"));
466    }
467
468    #[test]
469    fn test_parse_list_item_edge_cases() {
470        // Unicode content
471        let unicode_item = ListUtils::parse_list_item("- ๆต‹่ฏ•้กน็›ฎ ๐Ÿš€").unwrap();
472        assert_eq!(unicode_item.content, "ๆต‹่ฏ•้กน็›ฎ ๐Ÿš€");
473
474        // Empty content after marker
475        let empty_item = ListUtils::parse_list_item("- ").unwrap();
476        assert_eq!(empty_item.content, "");
477
478        // Multiple spaces after marker
479        let multi_space = ListUtils::parse_list_item("-   Multiple spaces").unwrap();
480        assert_eq!(multi_space.spaces_after_marker, 3);
481        assert_eq!(multi_space.content, "Multiple spaces");
482
483        // Very long ordered list numbers
484        let long_number = ListUtils::parse_list_item("999999. Item").unwrap();
485        assert_eq!(long_number.marker, "999999.");
486        assert_eq!(long_number.marker_type, ListMarkerType::Ordered);
487
488        // List with only marker - might not parse as valid list
489        if let Some(marker_only) = ListUtils::parse_list_item("*") {
490            assert_eq!(marker_only.content, "");
491            assert_eq!(marker_only.spaces_after_marker, 0);
492        }
493    }
494
495    #[test]
496    fn test_nested_list_detection() {
497        // Test detection of list items at various nesting levels
498        let lines = vec![
499            ("- Item 1", 0),
500            ("  - Item 1.1", 2),
501            ("    - Item 1.1.1", 4),
502            ("      - Item 1.1.1.1", 6),
503            ("    - Item 1.1.2", 4),
504            ("  - Item 1.2", 2),
505            ("- Item 2", 0),
506        ];
507
508        for (line, expected_indent) in lines {
509            let item = ListUtils::parse_list_item(line).unwrap();
510            assert_eq!(item.indentation, expected_indent, "Failed for line: {line}");
511        }
512    }
513
514    #[test]
515    fn test_mixed_list_markers() {
516        // Test different marker types
517        let markers = vec![
518            ("* Asterisk", ListMarkerType::Asterisk),
519            ("+ Plus", ListMarkerType::Plus),
520            ("- Minus", ListMarkerType::Minus),
521            ("1. Ordered", ListMarkerType::Ordered),
522            ("42. Ordered", ListMarkerType::Ordered),
523        ];
524
525        for (line, expected_type) in markers {
526            let item = ListUtils::parse_list_item(line).unwrap();
527            assert_eq!(item.marker_type, expected_type, "Failed for line: {line}");
528        }
529    }
530
531    #[test]
532    fn test_list_item_without_space_edge_cases() {
533        // Edge cases for missing spaces
534        assert!(ListUtils::is_list_item_without_space("*a"));
535        assert!(ListUtils::is_list_item_without_space("+b"));
536        assert!(ListUtils::is_list_item_without_space("-c"));
537        assert!(ListUtils::is_list_item_without_space("1.d"));
538
539        // Single character lines
540        assert!(!ListUtils::is_list_item_without_space("*"));
541        assert!(!ListUtils::is_list_item_without_space("+"));
542        assert!(!ListUtils::is_list_item_without_space("-"));
543
544        // Markers at end of line
545        assert!(!ListUtils::is_list_item_without_space("Text ends with -"));
546        assert!(!ListUtils::is_list_item_without_space("Text ends with *"));
547        assert!(!ListUtils::is_list_item_without_space("Number ends with 1."));
548    }
549
550    #[test]
551    fn test_list_item_with_multiple_spaces() {
552        // Test detection of multiple spaces after marker
553        assert!(ListUtils::is_list_item_with_multiple_spaces("-  Two spaces"));
554        assert!(ListUtils::is_list_item_with_multiple_spaces("*   Three spaces"));
555        assert!(ListUtils::is_list_item_with_multiple_spaces("+    Four spaces"));
556        assert!(ListUtils::is_list_item_with_multiple_spaces("1.  Two spaces"));
557
558        // Should not match single space
559        assert!(!ListUtils::is_list_item_with_multiple_spaces("- One space"));
560        assert!(!ListUtils::is_list_item_with_multiple_spaces("* One space"));
561        assert!(!ListUtils::is_list_item_with_multiple_spaces("+ One space"));
562        assert!(!ListUtils::is_list_item_with_multiple_spaces("1. One space"));
563    }
564
565    #[test]
566    fn test_complex_content_in_lists() {
567        // List items with inline formatting
568        let bold_item = ListUtils::parse_list_item("- **Bold** content").unwrap();
569        assert_eq!(bold_item.content, "**Bold** content");
570
571        let link_item = ListUtils::parse_list_item("* [Link](url) in list").unwrap();
572        assert_eq!(link_item.content, "[Link](url) in list");
573
574        let code_item = ListUtils::parse_list_item("+ Item with `code`").unwrap();
575        assert_eq!(code_item.content, "Item with `code`");
576
577        // List with inline HTML
578        let html_item = ListUtils::parse_list_item("- Item with <span>HTML</span>").unwrap();
579        assert_eq!(html_item.content, "Item with <span>HTML</span>");
580
581        // List with emoji
582        let emoji_item = ListUtils::parse_list_item("1. ๐ŸŽ‰ Party time!").unwrap();
583        assert_eq!(emoji_item.content, "๐ŸŽ‰ Party time!");
584    }
585
586    #[test]
587    fn test_ambiguous_list_markers() {
588        // Test cases that might be ambiguous
589
590        // Arithmetic expressions should not be lists
591        assert!(!ListUtils::is_list_item("2 + 2 = 4"));
592        assert!(!ListUtils::is_list_item("5 - 3 = 2"));
593        assert!(!ListUtils::is_list_item("3 * 3 = 9"));
594
595        // Emphasis markers should not be lists
596        assert!(!ListUtils::is_list_item("*emphasis*"));
597        assert!(!ListUtils::is_list_item("**strong**"));
598        assert!(!ListUtils::is_list_item("***strong emphasis***"));
599
600        // Date ranges
601        assert!(!ListUtils::is_list_item("2023-01-01 - 2023-12-31"));
602
603        // But these should be lists
604        assert!(ListUtils::is_list_item("- 2023-01-01 - 2023-12-31"));
605        assert!(ListUtils::is_list_item("* emphasis text here"));
606    }
607
608    #[test]
609    fn test_deeply_nested_complex_lists() {
610        let complex_doc = vec![
611            "- Top level item",
612            "  - Second level with **bold**",
613            "    1. Ordered item with `code`",
614            "    2. Another ordered item",
615            "      - Back to unordered [link](url)",
616            "        * Different marker",
617            "          + Yet another marker",
618            "            - Maximum nesting?",
619            "              1. Can we go deeper?",
620            "                - Apparently yes!",
621        ];
622
623        for line in complex_doc {
624            assert!(ListUtils::is_list_item(line), "Failed to recognize: {line}");
625            let item = ListUtils::parse_list_item(line).unwrap();
626            assert!(
627                !item.content.is_empty()
628                    || line.trim().ends_with('-')
629                    || line.trim().ends_with('*')
630                    || line.trim().ends_with('+')
631            );
632        }
633    }
634
635    #[test]
636    fn test_parse_list_item_comprehensive() {
637        // Test the comprehensive parsing with expected values
638        let test_cases = vec![
639            ("- Simple item", 0, ListMarkerType::Minus, "-", "Simple item"),
640            ("  * Indented", 2, ListMarkerType::Asterisk, "*", "Indented"),
641            ("    1. Ordered", 4, ListMarkerType::Ordered, "1.", "Ordered"),
642            ("\t+ Tab indent", 4, ListMarkerType::Plus, "+", "Tab indent"), // Tab counts as 4 spaces per CommonMark
643        ];
644
645        for (line, expected_indent, expected_type, expected_marker, expected_content) in test_cases {
646            let item = ListUtils::parse_list_item(line);
647            assert!(item.is_some(), "Failed to parse: {line}");
648            let item = item.unwrap();
649            assert_eq!(item.indentation, expected_indent, "Wrong indentation for: {line}");
650            assert_eq!(item.marker_type, expected_type, "Wrong marker type for: {line}");
651            assert_eq!(item.marker, expected_marker, "Wrong marker for: {line}");
652            assert_eq!(item.content, expected_content, "Wrong content for: {line}");
653        }
654    }
655
656    #[test]
657    fn test_special_characters_in_lists() {
658        // Test with special characters that might break regex
659        let special_cases = vec![
660            "- Item with $ dollar sign",
661            "* Item with ^ caret",
662            "+ Item with \\ backslash",
663            "- Item with | pipe",
664            "1. Item with ( ) parentheses",
665            "2. Item with [ ] brackets",
666            "3. Item with { } braces",
667        ];
668
669        for line in special_cases {
670            assert!(ListUtils::is_list_item(line), "Failed for: {line}");
671            let item = ListUtils::parse_list_item(line);
672            assert!(item.is_some(), "Failed to parse: {line}");
673        }
674    }
675
676    #[test]
677    fn test_list_continuations() {
678        // Lists that continue on multiple lines (not directly supported but shouldn't crash)
679        let continuation = "- This is a very long list item that \
680                           continues on the next line";
681        assert!(ListUtils::is_list_item(continuation));
682
683        // Indented continuation
684        let indented_cont = "  - Another long item that \
685                               continues with proper indentation";
686        assert!(ListUtils::is_list_item(indented_cont));
687    }
688
689    #[test]
690    fn test_performance_edge_cases() {
691        // Very long lines
692        let long_content = "x".repeat(10000);
693        let long_line = format!("- {long_content}");
694        assert!(ListUtils::is_list_item(&long_line));
695
696        // Many spaces
697        let many_spaces = " ".repeat(100);
698        let spaced_line = format!("{many_spaces}- Item");
699        assert!(ListUtils::is_list_item(&spaced_line));
700
701        // Large ordered number
702        let big_number = format!("{}. Item", "9".repeat(20));
703        assert!(ListUtils::is_list_item(&big_number));
704    }
705
706    #[test]
707    fn test_is_unordered_list_item() {
708        // Valid unordered list items
709        assert!(ListUtils::is_unordered_list_item("- Item"));
710        assert!(ListUtils::is_unordered_list_item("* Item"));
711        assert!(ListUtils::is_unordered_list_item("+ Item"));
712
713        // Invalid - ordered lists
714        assert!(!ListUtils::is_unordered_list_item("1. Item"));
715        assert!(!ListUtils::is_unordered_list_item("99. Item"));
716
717        // Invalid - no space after marker
718        assert!(!ListUtils::is_unordered_list_item("-Item"));
719        assert!(!ListUtils::is_unordered_list_item("*Item"));
720        assert!(!ListUtils::is_unordered_list_item("+Item"));
721    }
722
723    #[test]
724    fn test_calculate_indentation() {
725        // Test that tabs are counted as 4 spaces
726        assert_eq!(ListUtils::calculate_indentation(""), 0);
727        assert_eq!(ListUtils::calculate_indentation("    "), 4);
728        assert_eq!(ListUtils::calculate_indentation("\t"), 4);
729        assert_eq!(ListUtils::calculate_indentation("\t\t"), 8);
730        assert_eq!(ListUtils::calculate_indentation("  \t"), 6); // 2 spaces + 1 tab
731        assert_eq!(ListUtils::calculate_indentation("\t  "), 6); // 1 tab + 2 spaces
732        assert_eq!(ListUtils::calculate_indentation("\t\t  "), 10); // 2 tabs + 2 spaces
733        assert_eq!(ListUtils::calculate_indentation("  \t  \t"), 12); // 2 spaces + tab + 2 spaces + tab
734    }
735
736    #[test]
737    fn test_is_ordered_list_item() {
738        // Valid ordered list items
739        assert!(ListUtils::is_ordered_list_item("1. Item"));
740        assert!(ListUtils::is_ordered_list_item("99. Item"));
741        assert!(ListUtils::is_ordered_list_item("1234567890. Item"));
742
743        // Invalid - unordered lists
744        assert!(!ListUtils::is_ordered_list_item("- Item"));
745        assert!(!ListUtils::is_ordered_list_item("* Item"));
746        assert!(!ListUtils::is_ordered_list_item("+ Item"));
747
748        // Invalid - no space after period
749        assert!(!ListUtils::is_ordered_list_item("1.Item"));
750        assert!(!ListUtils::is_ordered_list_item("99.Item"));
751    }
752}