rumdl_lib/rules/
md029_ordered_list_prefix.rs

1/// Rule MD029: Ordered list item prefix
2///
3/// See [docs/md029.md](../../docs/md029.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::regex_cache::ORDERED_LIST_MARKER_REGEX;
7use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
8use toml;
9
10mod md029_config;
11pub use md029_config::{ListStyle, MD029Config};
12
13#[derive(Debug, Clone, Default)]
14pub struct MD029OrderedListPrefix {
15    config: MD029Config,
16}
17
18impl MD029OrderedListPrefix {
19    pub fn new(style: ListStyle) -> Self {
20        Self {
21            config: MD029Config { style },
22        }
23    }
24
25    pub fn from_config_struct(config: MD029Config) -> Self {
26        Self { config }
27    }
28
29    #[inline]
30    fn parse_marker_number(marker: &str) -> Option<usize> {
31        // Handle marker format like "1." or "1"
32        let num_part = if let Some(stripped) = marker.strip_suffix('.') {
33            stripped
34        } else {
35            marker
36        };
37        num_part.parse::<usize>().ok()
38    }
39
40    #[inline]
41    fn get_expected_number(&self, index: usize, detected_style: Option<ListStyle>) -> usize {
42        // Use detected_style when the configuration is auto-detect mode (OneOrOrdered or Consistent)
43        // For explicit style configurations, always use the configured style
44        let style = match self.config.style {
45            ListStyle::OneOrOrdered | ListStyle::Consistent => detected_style.unwrap_or(ListStyle::OneOne),
46            _ => self.config.style.clone(),
47        };
48
49        match style {
50            ListStyle::One | ListStyle::OneOne => 1,
51            ListStyle::Ordered => index + 1,
52            ListStyle::Ordered0 => index,
53            ListStyle::OneOrOrdered | ListStyle::Consistent => {
54                // This shouldn't be reached since we handle these above
55                1
56            }
57        }
58    }
59
60    /// Detect the style being used in a list by checking all items for prevalence
61    fn detect_list_style(
62        items: &[(
63            usize,
64            &crate::lint_context::LineInfo,
65            &crate::lint_context::ListItemInfo,
66        )],
67    ) -> ListStyle {
68        if items.len() < 2 {
69            // With only one item, we can't determine the style, default to OneOne
70            return ListStyle::OneOne;
71        }
72
73        let first_num = Self::parse_marker_number(&items[0].2.marker);
74        let second_num = Self::parse_marker_number(&items[1].2.marker);
75
76        // Fast path: Check for Ordered0 special case (starts with 0, 1)
77        if matches!((first_num, second_num), (Some(0), Some(1))) {
78            return ListStyle::Ordered0;
79        }
80
81        // Fast path: If first 2 items aren't both "1", it must be Ordered (O(1))
82        // This handles ~95% of lists instantly: "1. 2. 3...", "2. 3. 4...", etc.
83        if first_num != Some(1) || second_num != Some(1) {
84            return ListStyle::Ordered;
85        }
86
87        // Slow path: Both first items are "1", check if ALL are "1" (O(n))
88        // This is necessary for lists like "1. 1. 1..." vs "1. 1. 2. 3..."
89        let all_ones = items
90            .iter()
91            .all(|(_, _, item)| Self::parse_marker_number(&item.marker) == Some(1));
92
93        if all_ones {
94            ListStyle::OneOne
95        } else {
96            ListStyle::Ordered
97        }
98    }
99
100    /// Build a map from line number to list ID using pulldown-cmark's AST.
101    /// This is the authoritative source of truth for list membership.
102    fn build_commonmark_list_membership(content: &str) -> std::collections::HashMap<usize, usize> {
103        let mut line_to_list: std::collections::HashMap<usize, usize> = std::collections::HashMap::new();
104
105        // Pre-compute line start offsets for byte-to-line conversion
106        let line_starts: Vec<usize> = std::iter::once(0)
107            .chain(content.match_indices('\n').map(|(i, _)| i + 1))
108            .collect();
109
110        let byte_to_line = |byte_offset: usize| -> usize {
111            line_starts
112                .iter()
113                .rposition(|&start| start <= byte_offset)
114                .map(|i| i + 1) // 1-indexed
115                .unwrap_or(1)
116        };
117
118        let options = Options::empty();
119        let parser = Parser::new_ext(content, options);
120
121        let mut list_stack: Vec<(usize, bool)> = Vec::new(); // (list_id, is_ordered)
122        let mut next_list_id = 0;
123
124        for (event, range) in parser.into_offset_iter() {
125            match event {
126                Event::Start(Tag::List(start_num)) => {
127                    let is_ordered = start_num.is_some();
128                    list_stack.push((next_list_id, is_ordered));
129                    next_list_id += 1;
130                }
131                Event::End(TagEnd::List(_)) => {
132                    list_stack.pop();
133                }
134                Event::Start(Tag::Item) => {
135                    // Record the line number of this item and its list ID
136                    if let Some(&(list_id, is_ordered)) = list_stack.last()
137                        && is_ordered
138                    {
139                        let line_num = byte_to_line(range.start);
140                        line_to_list.insert(line_num, list_id);
141                    }
142                }
143                _ => {}
144            }
145        }
146
147        line_to_list
148    }
149
150    /// Group ordered items by their CommonMark list membership.
151    /// Returns groups of (line_num, LineInfo, ListItemInfo) for each distinct list.
152    fn group_items_by_commonmark_list<'a>(
153        ctx: &'a crate::lint_context::LintContext,
154        line_to_list: &std::collections::HashMap<usize, usize>,
155    ) -> Vec<
156        Vec<(
157            usize,
158            &'a crate::lint_context::LineInfo,
159            &'a crate::lint_context::ListItemInfo,
160        )>,
161    > {
162        // Collect all ordered items with their list IDs
163        let mut items_with_list_id: Vec<(
164            usize,
165            usize,
166            &crate::lint_context::LineInfo,
167            &crate::lint_context::ListItemInfo,
168        )> = Vec::new();
169
170        for line_num in 1..=ctx.lines.len() {
171            if let Some(line_info) = ctx.line_info(line_num)
172                && let Some(list_item) = &line_info.list_item
173                && list_item.is_ordered
174            {
175                // Get the list ID from pulldown-cmark's grouping
176                if let Some(&list_id) = line_to_list.get(&line_num) {
177                    items_with_list_id.push((list_id, line_num, line_info, list_item));
178                }
179            }
180        }
181
182        // Group by list_id
183        let mut groups: std::collections::HashMap<
184            usize,
185            Vec<(
186                usize,
187                &crate::lint_context::LineInfo,
188                &crate::lint_context::ListItemInfo,
189            )>,
190        > = std::collections::HashMap::new();
191
192        for (list_id, line_num, line_info, list_item) in items_with_list_id {
193            groups
194                .entry(list_id)
195                .or_default()
196                .push((line_num, line_info, list_item));
197        }
198
199        // Convert to Vec, sort each group by line number, and sort groups by first line
200        let mut result: Vec<_> = groups.into_values().collect();
201        for group in &mut result {
202            group.sort_by_key(|(line_num, _, _)| *line_num);
203        }
204        // Sort groups by their first item's line number for deterministic output
205        result.sort_by_key(|group| group.first().map(|(ln, _, _)| *ln).unwrap_or(0));
206
207        result
208    }
209
210    /// Check a CommonMark-grouped list for correct ordering
211    fn check_commonmark_list_group(
212        &self,
213        _ctx: &crate::lint_context::LintContext,
214        group: &[(
215            usize,
216            &crate::lint_context::LineInfo,
217            &crate::lint_context::ListItemInfo,
218        )],
219        warnings: &mut Vec<LintWarning>,
220        document_wide_style: Option<ListStyle>,
221    ) {
222        if group.is_empty() {
223            return;
224        }
225
226        // Group items by indentation level (marker_column) to handle nested lists
227        type LevelGroups<'a> = std::collections::HashMap<
228            usize,
229            Vec<(
230                usize,
231                &'a crate::lint_context::LineInfo,
232                &'a crate::lint_context::ListItemInfo,
233            )>,
234        >;
235        let mut level_groups: LevelGroups = std::collections::HashMap::new();
236
237        for (line_num, line_info, list_item) in group {
238            level_groups
239                .entry(list_item.marker_column)
240                .or_default()
241                .push((*line_num, *line_info, *list_item));
242        }
243
244        // Process each indentation level in sorted order for deterministic output
245        let mut sorted_levels: Vec<_> = level_groups.into_iter().collect();
246        sorted_levels.sort_by_key(|(indent, _)| *indent);
247
248        for (_indent, mut items) in sorted_levels {
249            // Sort by line number
250            items.sort_by_key(|(line_num, _, _)| *line_num);
251
252            // Determine style for this group
253            let detected_style = if let Some(doc_style) = document_wide_style.clone() {
254                Some(doc_style)
255            } else if self.config.style == ListStyle::OneOrOrdered {
256                Some(Self::detect_list_style(&items))
257            } else {
258                None
259            };
260
261            // Check each item
262            for (idx, (line_num, line_info, list_item)) in items.iter().enumerate() {
263                if let Some(actual_num) = Self::parse_marker_number(&list_item.marker) {
264                    let expected_num = self.get_expected_number(idx, detected_style.clone());
265
266                    if actual_num != expected_num {
267                        let marker_start = line_info.byte_offset + list_item.marker_column;
268                        let number_len = if let Some(dot_pos) = list_item.marker.find('.') {
269                            dot_pos
270                        } else if let Some(paren_pos) = list_item.marker.find(')') {
271                            paren_pos
272                        } else {
273                            list_item.marker.len()
274                        };
275
276                        let style_name = match detected_style.as_ref().unwrap_or(&ListStyle::Ordered) {
277                            ListStyle::OneOne => "one",
278                            ListStyle::Ordered => "ordered",
279                            ListStyle::Ordered0 => "ordered0",
280                            _ => "ordered",
281                        };
282
283                        let style_context = match self.config.style {
284                            ListStyle::Consistent => format!("document style '{style_name}'"),
285                            ListStyle::OneOrOrdered => format!("list style '{style_name}'"),
286                            ListStyle::One | ListStyle::OneOne => "configured style 'one'".to_string(),
287                            ListStyle::Ordered => "configured style 'ordered'".to_string(),
288                            ListStyle::Ordered0 => "configured style 'ordered0'".to_string(),
289                        };
290
291                        warnings.push(LintWarning {
292                            rule_name: Some(self.name().to_string()),
293                            message: format!(
294                                "Ordered list item number {actual_num} does not match {style_context} (expected {expected_num})"
295                            ),
296                            line: *line_num,
297                            column: list_item.marker_column + 1,
298                            end_line: *line_num,
299                            end_column: list_item.marker_column + number_len + 1,
300                            severity: Severity::Warning,
301                            fix: Some(Fix {
302                                range: marker_start..marker_start + number_len,
303                                replacement: expected_num.to_string(),
304                            }),
305                        });
306                    }
307                }
308            }
309        }
310    }
311}
312
313impl Rule for MD029OrderedListPrefix {
314    fn name(&self) -> &'static str {
315        "MD029"
316    }
317
318    fn description(&self) -> &'static str {
319        "Ordered list marker value"
320    }
321
322    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
323        // Early returns for performance
324        if ctx.content.is_empty() {
325            return Ok(Vec::new());
326        }
327
328        // Quick check for any ordered list markers before processing
329        if !ctx.content.contains('.') || !ctx.content.lines().any(|line| ORDERED_LIST_MARKER_REGEX.is_match(line)) {
330            return Ok(Vec::new());
331        }
332
333        let mut warnings = Vec::new();
334
335        // Use pulldown-cmark's AST for authoritative list membership.
336        // This fixes issues where heuristic-based grouping incorrectly splits lists.
337        let line_to_list = Self::build_commonmark_list_membership(ctx.content);
338        let list_groups = Self::group_items_by_commonmark_list(ctx, &line_to_list);
339
340        if list_groups.is_empty() {
341            return Ok(Vec::new());
342        }
343
344        // For Consistent style, detect document-wide prevalent style
345        let document_wide_style = if self.config.style == ListStyle::Consistent {
346            // Collect ALL ordered items from ALL groups
347            let mut all_document_items = Vec::new();
348            for group in &list_groups {
349                for (line_num, line_info, list_item) in group {
350                    all_document_items.push((*line_num, *line_info, *list_item));
351                }
352            }
353            // Detect style across entire document
354            if !all_document_items.is_empty() {
355                Some(Self::detect_list_style(&all_document_items))
356            } else {
357                None
358            }
359        } else {
360            None
361        };
362
363        // Process each CommonMark-defined list group
364        for group in list_groups {
365            self.check_commonmark_list_group(ctx, &group, &mut warnings, document_wide_style.clone());
366        }
367
368        // Sort warnings by line number for deterministic output
369        warnings.sort_by_key(|w| (w.line, w.column));
370
371        Ok(warnings)
372    }
373
374    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
375        // Use the same logic as check() - just apply the fixes from warnings
376        let warnings = self.check(ctx)?;
377
378        if warnings.is_empty() {
379            // No changes needed
380            return Ok(ctx.content.to_string());
381        }
382
383        // Collect fixes and sort by position
384        let mut fixes: Vec<&Fix> = Vec::new();
385        for warning in &warnings {
386            if let Some(ref fix) = warning.fix {
387                fixes.push(fix);
388            }
389        }
390        fixes.sort_by_key(|f| f.range.start);
391
392        let mut result = String::new();
393        let mut last_pos = 0;
394        let content_bytes = ctx.content.as_bytes();
395
396        for fix in fixes {
397            // Add content before the fix
398            if last_pos < fix.range.start {
399                let chunk = &content_bytes[last_pos..fix.range.start];
400                result.push_str(
401                    std::str::from_utf8(chunk).map_err(|_| LintError::InvalidInput("Invalid UTF-8".to_string()))?,
402                );
403            }
404            // Add the replacement
405            result.push_str(&fix.replacement);
406            last_pos = fix.range.end;
407        }
408
409        // Add remaining content
410        if last_pos < content_bytes.len() {
411            let chunk = &content_bytes[last_pos..];
412            result.push_str(
413                std::str::from_utf8(chunk).map_err(|_| LintError::InvalidInput("Invalid UTF-8".to_string()))?,
414            );
415        }
416
417        Ok(result)
418    }
419
420    /// Get the category of this rule for selective processing
421    fn category(&self) -> RuleCategory {
422        RuleCategory::List
423    }
424
425    /// Check if this rule should be skipped
426    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
427        ctx.content.is_empty() || !ctx.likely_has_lists()
428    }
429
430    fn as_any(&self) -> &dyn std::any::Any {
431        self
432    }
433
434    fn default_config_section(&self) -> Option<(String, toml::Value)> {
435        let default_config = MD029Config::default();
436        let json_value = serde_json::to_value(&default_config).ok()?;
437        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
438        if let toml::Value::Table(table) = toml_value {
439            if !table.is_empty() {
440                Some((MD029Config::RULE_NAME.to_string(), toml::Value::Table(table)))
441            } else {
442                None
443            }
444        } else {
445            None
446        }
447    }
448
449    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
450    where
451        Self: Sized,
452    {
453        let rule_config = crate::rule_config_serde::load_rule_config::<MD029Config>(config);
454        Box::new(MD029OrderedListPrefix::from_config_struct(rule_config))
455    }
456}
457
458#[cfg(test)]
459mod tests {
460    use super::*;
461
462    #[test]
463    fn test_basic_functionality() {
464        // Test with default style (ordered)
465        let rule = MD029OrderedListPrefix::default();
466
467        // Test with correctly ordered list
468        let content = "1. First item\n2. Second item\n3. Third item";
469        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
470        let result = rule.check(&ctx).unwrap();
471        assert!(result.is_empty());
472
473        // Test with incorrectly ordered list
474        let content = "1. First item\n3. Third item\n5. Fifth item";
475        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
476        let result = rule.check(&ctx).unwrap();
477        assert_eq!(result.len(), 2); // Should have warnings for items 3 and 5
478
479        // Test with one-one style
480        let rule = MD029OrderedListPrefix::new(ListStyle::OneOne);
481        let content = "1. First item\n2. Second item\n3. Third item";
482        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
483        let result = rule.check(&ctx).unwrap();
484        assert_eq!(result.len(), 2); // Should have warnings for items 2 and 3
485
486        // Test with ordered0 style
487        let rule = MD029OrderedListPrefix::new(ListStyle::Ordered0);
488        let content = "0. First item\n1. Second item\n2. Third item";
489        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
490        let result = rule.check(&ctx).unwrap();
491        assert!(result.is_empty());
492    }
493
494    #[test]
495    fn test_redundant_computation_fix() {
496        // This test confirms that the redundant computation bug is fixed
497        // Previously: get_list_number() was called twice (once for is_some(), once for unwrap())
498        // Now: get_list_number() is called once with if let pattern
499
500        let rule = MD029OrderedListPrefix::default();
501
502        // Test with mixed valid and edge case content
503        let content = "1. First item\n3. Wrong number\n2. Another wrong number";
504        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
505
506        // This should not panic and should produce warnings for incorrect numbering
507        let result = rule.check(&ctx).unwrap();
508        assert_eq!(result.len(), 2); // Should have warnings for items 3 and 2
509
510        // Verify the warnings have correct content
511        assert!(result[0].message.contains("3") && result[0].message.contains("expected 2"));
512        assert!(result[1].message.contains("2") && result[1].message.contains("expected 3"));
513    }
514
515    #[test]
516    fn test_performance_improvement() {
517        // This test verifies that the fix improves performance by avoiding redundant calls
518        let rule = MD029OrderedListPrefix::default();
519
520        // Create a larger list to test performance
521        let mut content = String::new();
522        for i in 1..=100 {
523            content.push_str(&format!("{}. Item {}\n", i + 1, i)); // All wrong numbers
524        }
525
526        let ctx = crate::lint_context::LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
527
528        // This should complete without issues and produce warnings for all items
529        let result = rule.check(&ctx).unwrap();
530        assert_eq!(result.len(), 100); // Should have warnings for all 100 items
531
532        // Verify first and last warnings
533        assert!(result[0].message.contains("2") && result[0].message.contains("expected 1"));
534        assert!(result[99].message.contains("101") && result[99].message.contains("expected 100"));
535    }
536
537    #[test]
538    fn test_one_or_ordered_with_all_ones() {
539        // Test OneOrOrdered style with all 1s (should pass)
540        let rule = MD029OrderedListPrefix::new(ListStyle::OneOrOrdered);
541
542        let content = "1. First item\n1. Second item\n1. Third item";
543        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
544        let result = rule.check(&ctx).unwrap();
545        assert!(result.is_empty(), "All ones should be valid in OneOrOrdered mode");
546    }
547
548    #[test]
549    fn test_one_or_ordered_with_sequential() {
550        // Test OneOrOrdered style with sequential numbering (should pass)
551        let rule = MD029OrderedListPrefix::new(ListStyle::OneOrOrdered);
552
553        let content = "1. First item\n2. Second item\n3. Third item";
554        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
555        let result = rule.check(&ctx).unwrap();
556        assert!(
557            result.is_empty(),
558            "Sequential numbering should be valid in OneOrOrdered mode"
559        );
560    }
561
562    #[test]
563    fn test_one_or_ordered_with_mixed_style() {
564        // Test OneOrOrdered style with mixed numbering (should fail)
565        let rule = MD029OrderedListPrefix::new(ListStyle::OneOrOrdered);
566
567        let content = "1. First item\n2. Second item\n1. Third item";
568        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
569        let result = rule.check(&ctx).unwrap();
570        assert_eq!(result.len(), 1, "Mixed style should produce one warning");
571        assert!(result[0].message.contains("1") && result[0].message.contains("expected 3"));
572    }
573
574    #[test]
575    fn test_one_or_ordered_separate_lists() {
576        // Test OneOrOrdered with separate lists using different styles (should pass)
577        let rule = MD029OrderedListPrefix::new(ListStyle::OneOrOrdered);
578
579        let content = "# First list\n\n1. Item A\n1. Item B\n\n# Second list\n\n1. Item X\n2. Item Y\n3. Item Z";
580        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
581        let result = rule.check(&ctx).unwrap();
582        assert!(
583            result.is_empty(),
584            "Separate lists can use different styles in OneOrOrdered mode"
585        );
586    }
587}