rumdl_lib/rules/
md029_ordered_list_prefix.rs

1/// Rule MD029: Ordered list item prefix
2///
3/// See [docs/md029.md](../../docs/md029.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::regex_cache::ORDERED_LIST_MARKER_REGEX;
7use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
8use std::collections::HashMap;
9use toml;
10
11mod md029_config;
12pub use md029_config::{ListStyle, MD029Config};
13
14/// Type alias for grouped list items: (list_id, items) where items are (line_num, LineInfo, ListItemInfo)
15type ListItemGroup<'a> = (
16    usize,
17    Vec<(
18        usize,
19        &'a crate::lint_context::LineInfo,
20        &'a crate::lint_context::ListItemInfo,
21    )>,
22);
23
24#[derive(Debug, Clone, Default)]
25pub struct MD029OrderedListPrefix {
26    config: MD029Config,
27}
28
29impl MD029OrderedListPrefix {
30    pub fn new(style: ListStyle) -> Self {
31        Self {
32            config: MD029Config { style },
33        }
34    }
35
36    pub fn from_config_struct(config: MD029Config) -> Self {
37        Self { config }
38    }
39
40    #[inline]
41    fn parse_marker_number(marker: &str) -> Option<usize> {
42        // Handle marker format like "1." or "1"
43        let num_part = if let Some(stripped) = marker.strip_suffix('.') {
44            stripped
45        } else {
46            marker
47        };
48        num_part.parse::<usize>().ok()
49    }
50
51    /// Calculate the expected number for a list item.
52    /// The `start_value` is the CommonMark-provided start value for the list.
53    /// For style `Ordered`, items should be `start_value, start_value+1, start_value+2, ...`
54    #[inline]
55    fn get_expected_number(&self, index: usize, detected_style: Option<ListStyle>, start_value: u64) -> usize {
56        // Use detected_style when the configuration is auto-detect mode (OneOrOrdered or Consistent)
57        // For explicit style configurations, always use the configured style
58        let style = match self.config.style {
59            ListStyle::OneOrOrdered | ListStyle::Consistent => detected_style.unwrap_or(ListStyle::OneOne),
60            _ => self.config.style.clone(),
61        };
62
63        match style {
64            ListStyle::One | ListStyle::OneOne => 1,
65            ListStyle::Ordered => (start_value as usize) + index,
66            ListStyle::Ordered0 => index,
67            ListStyle::OneOrOrdered | ListStyle::Consistent => {
68                // This shouldn't be reached since we handle these above
69                1
70            }
71        }
72    }
73
74    /// Detect the style being used in a list by checking all items for prevalence.
75    /// The `start_value` parameter is the CommonMark-provided list start value.
76    fn detect_list_style(
77        items: &[(
78            usize,
79            &crate::lint_context::LineInfo,
80            &crate::lint_context::ListItemInfo,
81        )],
82        start_value: u64,
83    ) -> ListStyle {
84        if items.len() < 2 {
85            // With only one item, check if it matches the start value
86            // If so, treat as Ordered (respects CommonMark start value)
87            // Otherwise, check if it's 1 (OneOne style)
88            let first_num = Self::parse_marker_number(&items[0].2.marker);
89            if first_num == Some(start_value as usize) {
90                return ListStyle::Ordered;
91            }
92            return ListStyle::OneOne;
93        }
94
95        let first_num = Self::parse_marker_number(&items[0].2.marker);
96        let second_num = Self::parse_marker_number(&items[1].2.marker);
97
98        // Fast path: Check for Ordered0 special case (starts with 0, 1)
99        if matches!((first_num, second_num), (Some(0), Some(1))) {
100            return ListStyle::Ordered0;
101        }
102
103        // Fast path: If first 2 items aren't both "1", it must be Ordered (O(1))
104        // This handles ~95% of lists instantly: "1. 2. 3...", "2. 3. 4...", etc.
105        if first_num != Some(1) || second_num != Some(1) {
106            return ListStyle::Ordered;
107        }
108
109        // Slow path: Both first items are "1", check if ALL are "1" (O(n))
110        // This is necessary for lists like "1. 1. 1..." vs "1. 1. 2. 3..."
111        let all_ones = items
112            .iter()
113            .all(|(_, _, item)| Self::parse_marker_number(&item.marker) == Some(1));
114
115        if all_ones {
116            ListStyle::OneOne
117        } else {
118            ListStyle::Ordered
119        }
120    }
121
122    /// Build maps from line number to list ID and list ID to start value using pulldown-cmark's AST.
123    /// This is the authoritative source of truth for list membership and respects CommonMark's
124    /// list start values (e.g., a list that starts at 11 is valid if items are 11, 12, 13...).
125    fn build_commonmark_list_membership(
126        content: &str,
127    ) -> (
128        std::collections::HashMap<usize, usize>,
129        std::collections::HashMap<usize, u64>,
130    ) {
131        let mut line_to_list: std::collections::HashMap<usize, usize> = std::collections::HashMap::new();
132        let mut list_start_values: std::collections::HashMap<usize, u64> = std::collections::HashMap::new();
133
134        // Pre-compute line start offsets for byte-to-line conversion
135        let line_starts: Vec<usize> = std::iter::once(0)
136            .chain(content.match_indices('\n').map(|(i, _)| i + 1))
137            .collect();
138
139        let byte_to_line = |byte_offset: usize| -> usize {
140            line_starts
141                .iter()
142                .rposition(|&start| start <= byte_offset)
143                .map(|i| i + 1) // 1-indexed
144                .unwrap_or(1)
145        };
146
147        let options = Options::empty();
148        let parser = Parser::new_ext(content, options);
149
150        let mut list_stack: Vec<(usize, bool, u64)> = Vec::new(); // (list_id, is_ordered, start_value)
151        let mut next_list_id = 0;
152
153        for (event, range) in parser.into_offset_iter() {
154            match event {
155                Event::Start(Tag::List(start_num)) => {
156                    let is_ordered = start_num.is_some();
157                    let start_value = start_num.unwrap_or(1);
158                    list_stack.push((next_list_id, is_ordered, start_value));
159                    if is_ordered {
160                        list_start_values.insert(next_list_id, start_value);
161                    }
162                    next_list_id += 1;
163                }
164                Event::End(TagEnd::List(_)) => {
165                    list_stack.pop();
166                }
167                Event::Start(Tag::Item) => {
168                    // Record the line number of this item and its list ID
169                    if let Some(&(list_id, is_ordered, _)) = list_stack.last()
170                        && is_ordered
171                    {
172                        let line_num = byte_to_line(range.start);
173                        line_to_list.insert(line_num, list_id);
174                    }
175                }
176                _ => {}
177            }
178        }
179
180        (line_to_list, list_start_values)
181    }
182
183    /// Group ordered items by their CommonMark list membership.
184    /// Returns (list_id, items) tuples for each distinct list, where items are (line_num, LineInfo, ListItemInfo).
185    fn group_items_by_commonmark_list<'a>(
186        ctx: &'a crate::lint_context::LintContext,
187        line_to_list: &std::collections::HashMap<usize, usize>,
188    ) -> Vec<ListItemGroup<'a>> {
189        // Collect all ordered items with their list IDs
190        let mut items_with_list_id: Vec<(
191            usize,
192            usize,
193            &crate::lint_context::LineInfo,
194            &crate::lint_context::ListItemInfo,
195        )> = Vec::new();
196
197        for line_num in 1..=ctx.lines.len() {
198            if let Some(line_info) = ctx.line_info(line_num)
199                && let Some(list_item) = &line_info.list_item
200                && list_item.is_ordered
201            {
202                // Get the list ID from pulldown-cmark's grouping
203                if let Some(&list_id) = line_to_list.get(&line_num) {
204                    items_with_list_id.push((list_id, line_num, line_info, list_item));
205                }
206            }
207        }
208
209        // Group by list_id
210        let mut groups: std::collections::HashMap<
211            usize,
212            Vec<(
213                usize,
214                &crate::lint_context::LineInfo,
215                &crate::lint_context::ListItemInfo,
216            )>,
217        > = std::collections::HashMap::new();
218
219        for (list_id, line_num, line_info, list_item) in items_with_list_id {
220            groups
221                .entry(list_id)
222                .or_default()
223                .push((line_num, line_info, list_item));
224        }
225
226        // Convert to Vec of (list_id, items), sort each group by line number, and sort groups by first line
227        let mut result: Vec<_> = groups.into_iter().collect();
228        for (_, items) in &mut result {
229            items.sort_by_key(|(line_num, _, _)| *line_num);
230        }
231        // Sort groups by their first item's line number for deterministic output
232        result.sort_by_key(|(_, items)| items.first().map(|(ln, _, _)| *ln).unwrap_or(0));
233
234        result
235    }
236
237    /// Check a CommonMark-grouped list for correct ordering.
238    /// Uses the CommonMark start value to validate items (e.g., a list starting at 11
239    /// expects items 11, 12, 13... - no violation there).
240    fn check_commonmark_list_group(
241        &self,
242        _ctx: &crate::lint_context::LintContext,
243        group: &[(
244            usize,
245            &crate::lint_context::LineInfo,
246            &crate::lint_context::ListItemInfo,
247        )],
248        warnings: &mut Vec<LintWarning>,
249        document_wide_style: Option<ListStyle>,
250        start_value: u64,
251    ) {
252        if group.is_empty() {
253            return;
254        }
255
256        // Group items by indentation level (marker_column) to handle nested lists
257        type LevelGroups<'a> = HashMap<
258            usize,
259            Vec<(
260                usize,
261                &'a crate::lint_context::LineInfo,
262                &'a crate::lint_context::ListItemInfo,
263            )>,
264        >;
265        let mut level_groups: LevelGroups = HashMap::new();
266
267        for (line_num, line_info, list_item) in group {
268            level_groups
269                .entry(list_item.marker_column)
270                .or_default()
271                .push((*line_num, *line_info, *list_item));
272        }
273
274        // Process each indentation level in sorted order for deterministic output
275        let mut sorted_levels: Vec<_> = level_groups.into_iter().collect();
276        sorted_levels.sort_by_key(|(indent, _)| *indent);
277
278        for (_indent, mut items) in sorted_levels {
279            // Sort by line number
280            items.sort_by_key(|(line_num, _, _)| *line_num);
281
282            if items.is_empty() {
283                continue;
284            }
285
286            // Determine style for this group
287            let detected_style = if let Some(doc_style) = document_wide_style.clone() {
288                Some(doc_style)
289            } else if self.config.style == ListStyle::OneOrOrdered {
290                Some(Self::detect_list_style(&items, start_value))
291            } else {
292                None
293            };
294
295            // Check each item using the CommonMark start value
296            for (idx, (line_num, line_info, list_item)) in items.iter().enumerate() {
297                if let Some(actual_num) = Self::parse_marker_number(&list_item.marker) {
298                    let expected_num = self.get_expected_number(idx, detected_style.clone(), start_value);
299
300                    if actual_num != expected_num {
301                        let marker_start = line_info.byte_offset + list_item.marker_column;
302                        let number_len = if let Some(dot_pos) = list_item.marker.find('.') {
303                            dot_pos
304                        } else if let Some(paren_pos) = list_item.marker.find(')') {
305                            paren_pos
306                        } else {
307                            list_item.marker.len()
308                        };
309
310                        let style_name = match detected_style.as_ref().unwrap_or(&ListStyle::Ordered) {
311                            ListStyle::OneOne => "one",
312                            ListStyle::Ordered => "ordered",
313                            ListStyle::Ordered0 => "ordered0",
314                            _ => "ordered",
315                        };
316
317                        let style_context = match self.config.style {
318                            ListStyle::Consistent => format!("document style '{style_name}'"),
319                            ListStyle::OneOrOrdered => format!("list style '{style_name}'"),
320                            ListStyle::One | ListStyle::OneOne => "configured style 'one'".to_string(),
321                            ListStyle::Ordered => "configured style 'ordered'".to_string(),
322                            ListStyle::Ordered0 => "configured style 'ordered0'".to_string(),
323                        };
324
325                        // Only provide auto-fix when:
326                        // 1. The list starts at 1 (default numbering), OR
327                        // 2. We're using explicit 'one' style (numbers are meaningless)
328                        // When start_value > 1, the user explicitly chose that number,
329                        // so auto-fixing would destroy their intent.
330                        let should_provide_fix =
331                            start_value == 1 || matches!(self.config.style, ListStyle::One | ListStyle::OneOne);
332
333                        warnings.push(LintWarning {
334                            rule_name: Some(self.name().to_string()),
335                            message: format!(
336                                "Ordered list item number {actual_num} does not match {style_context} (expected {expected_num})"
337                            ),
338                            line: *line_num,
339                            column: list_item.marker_column + 1,
340                            end_line: *line_num,
341                            end_column: list_item.marker_column + number_len + 1,
342                            severity: Severity::Warning,
343                            fix: if should_provide_fix {
344                                Some(Fix {
345                                    range: marker_start..marker_start + number_len,
346                                    replacement: expected_num.to_string(),
347                                })
348                            } else {
349                                None
350                            },
351                        });
352                    }
353                }
354            }
355        }
356    }
357}
358
359impl Rule for MD029OrderedListPrefix {
360    fn name(&self) -> &'static str {
361        "MD029"
362    }
363
364    fn description(&self) -> &'static str {
365        "Ordered list marker value"
366    }
367
368    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
369        // Early returns for performance
370        if ctx.content.is_empty() {
371            return Ok(Vec::new());
372        }
373
374        // Quick check for any ordered list markers before processing
375        if !ctx.content.contains('.') || !ctx.content.lines().any(|line| ORDERED_LIST_MARKER_REGEX.is_match(line)) {
376            return Ok(Vec::new());
377        }
378
379        let mut warnings = Vec::new();
380
381        // Use pulldown-cmark's AST for authoritative list membership and start values.
382        // This respects CommonMark's list start values (e.g., a list starting at 11
383        // expects items 11, 12, 13... - no violation there).
384        let (line_to_list, list_start_values) = Self::build_commonmark_list_membership(ctx.content);
385        let list_groups = Self::group_items_by_commonmark_list(ctx, &line_to_list);
386
387        if list_groups.is_empty() {
388            return Ok(Vec::new());
389        }
390
391        // For Consistent style, detect document-wide prevalent style
392        let document_wide_style = if self.config.style == ListStyle::Consistent {
393            // Collect ALL ordered items from ALL groups
394            let mut all_document_items = Vec::new();
395            for (_, items) in &list_groups {
396                for (line_num, line_info, list_item) in items {
397                    all_document_items.push((*line_num, *line_info, *list_item));
398                }
399            }
400            // Detect style across entire document (use 1 as default for pattern detection)
401            if !all_document_items.is_empty() {
402                Some(Self::detect_list_style(&all_document_items, 1))
403            } else {
404                None
405            }
406        } else {
407            None
408        };
409
410        // Process each CommonMark-defined list group with its start value
411        for (list_id, items) in list_groups {
412            let start_value = list_start_values.get(&list_id).copied().unwrap_or(1);
413            self.check_commonmark_list_group(ctx, &items, &mut warnings, document_wide_style.clone(), start_value);
414        }
415
416        // Sort warnings by line number for deterministic output
417        warnings.sort_by_key(|w| (w.line, w.column));
418
419        Ok(warnings)
420    }
421
422    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
423        // Use the same logic as check() - just apply the fixes from warnings
424        let warnings = self.check(ctx)?;
425
426        if warnings.is_empty() {
427            // No changes needed
428            return Ok(ctx.content.to_string());
429        }
430
431        // Collect fixes and sort by position
432        let mut fixes: Vec<&Fix> = Vec::new();
433        for warning in &warnings {
434            if let Some(ref fix) = warning.fix {
435                fixes.push(fix);
436            }
437        }
438        fixes.sort_by_key(|f| f.range.start);
439
440        let mut result = String::new();
441        let mut last_pos = 0;
442        let content_bytes = ctx.content.as_bytes();
443
444        for fix in fixes {
445            // Add content before the fix
446            if last_pos < fix.range.start {
447                let chunk = &content_bytes[last_pos..fix.range.start];
448                result.push_str(
449                    std::str::from_utf8(chunk).map_err(|_| LintError::InvalidInput("Invalid UTF-8".to_string()))?,
450                );
451            }
452            // Add the replacement
453            result.push_str(&fix.replacement);
454            last_pos = fix.range.end;
455        }
456
457        // Add remaining content
458        if last_pos < content_bytes.len() {
459            let chunk = &content_bytes[last_pos..];
460            result.push_str(
461                std::str::from_utf8(chunk).map_err(|_| LintError::InvalidInput("Invalid UTF-8".to_string()))?,
462            );
463        }
464
465        Ok(result)
466    }
467
468    /// Get the category of this rule for selective processing
469    fn category(&self) -> RuleCategory {
470        RuleCategory::List
471    }
472
473    /// Check if this rule should be skipped
474    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
475        ctx.content.is_empty() || !ctx.likely_has_lists()
476    }
477
478    fn as_any(&self) -> &dyn std::any::Any {
479        self
480    }
481
482    fn default_config_section(&self) -> Option<(String, toml::Value)> {
483        let default_config = MD029Config::default();
484        let json_value = serde_json::to_value(&default_config).ok()?;
485        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
486        if let toml::Value::Table(table) = toml_value {
487            if !table.is_empty() {
488                Some((MD029Config::RULE_NAME.to_string(), toml::Value::Table(table)))
489            } else {
490                None
491            }
492        } else {
493            None
494        }
495    }
496
497    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
498    where
499        Self: Sized,
500    {
501        let rule_config = crate::rule_config_serde::load_rule_config::<MD029Config>(config);
502        Box::new(MD029OrderedListPrefix::from_config_struct(rule_config))
503    }
504}
505
506#[cfg(test)]
507mod tests {
508    use super::*;
509
510    #[test]
511    fn test_basic_functionality() {
512        // Test with default style (ordered)
513        let rule = MD029OrderedListPrefix::default();
514
515        // Test with correctly ordered list
516        let content = "1. First item\n2. Second item\n3. Third item";
517        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
518        let result = rule.check(&ctx).unwrap();
519        assert!(result.is_empty());
520
521        // Test with incorrectly ordered list
522        let content = "1. First item\n3. Third item\n5. Fifth item";
523        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
524        let result = rule.check(&ctx).unwrap();
525        assert_eq!(result.len(), 2); // Should have warnings for items 3 and 5
526
527        // Test with one-one style
528        let rule = MD029OrderedListPrefix::new(ListStyle::OneOne);
529        let content = "1. First item\n2. Second item\n3. Third item";
530        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
531        let result = rule.check(&ctx).unwrap();
532        assert_eq!(result.len(), 2); // Should have warnings for items 2 and 3
533
534        // Test with ordered0 style
535        let rule = MD029OrderedListPrefix::new(ListStyle::Ordered0);
536        let content = "0. First item\n1. Second item\n2. Third item";
537        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
538        let result = rule.check(&ctx).unwrap();
539        assert!(result.is_empty());
540    }
541
542    #[test]
543    fn test_redundant_computation_fix() {
544        // This test confirms that the redundant computation bug is fixed
545        // Previously: get_list_number() was called twice (once for is_some(), once for unwrap())
546        // Now: get_list_number() is called once with if let pattern
547
548        let rule = MD029OrderedListPrefix::default();
549
550        // Test with mixed valid and edge case content
551        let content = "1. First item\n3. Wrong number\n2. Another wrong number";
552        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
553
554        // This should not panic and should produce warnings for incorrect numbering
555        let result = rule.check(&ctx).unwrap();
556        assert_eq!(result.len(), 2); // Should have warnings for items 3 and 2
557
558        // Verify the warnings have correct content
559        assert!(result[0].message.contains("3") && result[0].message.contains("expected 2"));
560        assert!(result[1].message.contains("2") && result[1].message.contains("expected 3"));
561    }
562
563    #[test]
564    fn test_performance_improvement() {
565        // This test verifies the rule handles large lists without performance issues
566        let rule = MD029OrderedListPrefix::default();
567
568        // Create a larger list with WRONG numbers: 1, 5, 10, 15, ...
569        // Starting at 1, CommonMark expects 1, 2, 3, 4, ...
570        // So items 2-100 are all wrong (expected 2, got 5; expected 3, got 10; etc.)
571        let mut content = String::from("1. Item 1\n"); // First item correct
572        for i in 2..=100 {
573            content.push_str(&format!("{}. Item {}\n", i * 5 - 5, i)); // Wrong numbers
574        }
575
576        let ctx = crate::lint_context::LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
577
578        // This should complete without issues and produce warnings for items 2-100
579        let result = rule.check(&ctx).unwrap();
580        assert_eq!(result.len(), 99, "Should have warnings for items 2-100 (99 items)");
581
582        // First wrong item: "5. Item 2" (expected 2)
583        assert!(result[0].message.contains("5") && result[0].message.contains("expected 2"));
584    }
585
586    #[test]
587    fn test_one_or_ordered_with_all_ones() {
588        // Test OneOrOrdered style with all 1s (should pass)
589        let rule = MD029OrderedListPrefix::new(ListStyle::OneOrOrdered);
590
591        let content = "1. First item\n1. Second item\n1. Third item";
592        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
593        let result = rule.check(&ctx).unwrap();
594        assert!(result.is_empty(), "All ones should be valid in OneOrOrdered mode");
595    }
596
597    #[test]
598    fn test_one_or_ordered_with_sequential() {
599        // Test OneOrOrdered style with sequential numbering (should pass)
600        let rule = MD029OrderedListPrefix::new(ListStyle::OneOrOrdered);
601
602        let content = "1. First item\n2. Second item\n3. Third item";
603        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
604        let result = rule.check(&ctx).unwrap();
605        assert!(
606            result.is_empty(),
607            "Sequential numbering should be valid in OneOrOrdered mode"
608        );
609    }
610
611    #[test]
612    fn test_one_or_ordered_with_mixed_style() {
613        // Test OneOrOrdered style with mixed numbering (should fail)
614        let rule = MD029OrderedListPrefix::new(ListStyle::OneOrOrdered);
615
616        let content = "1. First item\n2. Second item\n1. Third item";
617        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
618        let result = rule.check(&ctx).unwrap();
619        assert_eq!(result.len(), 1, "Mixed style should produce one warning");
620        assert!(result[0].message.contains("1") && result[0].message.contains("expected 3"));
621    }
622
623    #[test]
624    fn test_one_or_ordered_separate_lists() {
625        // Test OneOrOrdered with separate lists using different styles (should pass)
626        let rule = MD029OrderedListPrefix::new(ListStyle::OneOrOrdered);
627
628        let content = "# First list\n\n1. Item A\n1. Item B\n\n# Second list\n\n1. Item X\n2. Item Y\n3. Item Z";
629        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
630        let result = rule.check(&ctx).unwrap();
631        assert!(
632            result.is_empty(),
633            "Separate lists can use different styles in OneOrOrdered mode"
634        );
635    }
636}