Skip to main content

rumdl_lib/rules/
md029_ordered_list_prefix.rs

1/// Rule MD029: Ordered list item prefix
2///
3/// See [docs/md029.md](../../docs/md029.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::regex_cache::ORDERED_LIST_MARKER_REGEX;
7use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
8use std::collections::HashMap;
9use toml;
10
11mod md029_config;
12pub use md029_config::{ListStyle, MD029Config};
13
14/// Type alias for grouped list items: (list_id, items) where items are (line_num, LineInfo, ListItemInfo)
15type ListItemGroup<'a> = (
16    usize,
17    Vec<(
18        usize,
19        &'a crate::lint_context::LineInfo,
20        &'a crate::lint_context::ListItemInfo,
21    )>,
22);
23
24#[derive(Debug, Clone, Default)]
25pub struct MD029OrderedListPrefix {
26    config: MD029Config,
27}
28
29impl MD029OrderedListPrefix {
30    pub fn new(style: ListStyle) -> Self {
31        Self {
32            config: MD029Config { style },
33        }
34    }
35
36    pub fn from_config_struct(config: MD029Config) -> Self {
37        Self { config }
38    }
39
40    #[inline]
41    fn parse_marker_number(marker: &str) -> Option<usize> {
42        // Handle marker format like "1." or "1"
43        let num_part = if let Some(stripped) = marker.strip_suffix('.') {
44            stripped
45        } else {
46            marker
47        };
48        num_part.parse::<usize>().ok()
49    }
50
51    /// Calculate the expected number for a list item.
52    /// The `start_value` is the CommonMark-provided start value for the list.
53    /// For style `Ordered`, items should be `start_value, start_value+1, start_value+2, ...`
54    #[inline]
55    fn get_expected_number(&self, index: usize, detected_style: Option<ListStyle>, start_value: u64) -> usize {
56        // Use detected_style when the configuration is auto-detect mode (OneOrOrdered or Consistent)
57        // For explicit style configurations, always use the configured style
58        let style = match self.config.style {
59            ListStyle::OneOrOrdered | ListStyle::Consistent => detected_style.unwrap_or(ListStyle::OneOne),
60            _ => self.config.style.clone(),
61        };
62
63        match style {
64            ListStyle::One | ListStyle::OneOne => 1,
65            ListStyle::Ordered => (start_value as usize) + index,
66            ListStyle::Ordered0 => index,
67            ListStyle::OneOrOrdered | ListStyle::Consistent => {
68                // This shouldn't be reached since we handle these above
69                1
70            }
71        }
72    }
73
74    /// Detect the style being used in a list by checking all items for prevalence.
75    /// The `start_value` parameter is the CommonMark-provided list start value.
76    fn detect_list_style(
77        items: &[(
78            usize,
79            &crate::lint_context::LineInfo,
80            &crate::lint_context::ListItemInfo,
81        )],
82        start_value: u64,
83    ) -> ListStyle {
84        if items.len() < 2 {
85            // With only one item, check if it matches the start value
86            // If so, treat as Ordered (respects CommonMark start value)
87            // Otherwise, check if it's 1 (OneOne style)
88            let first_num = Self::parse_marker_number(&items[0].2.marker);
89            if first_num == Some(start_value as usize) {
90                return ListStyle::Ordered;
91            }
92            return ListStyle::OneOne;
93        }
94
95        let first_num = Self::parse_marker_number(&items[0].2.marker);
96        let second_num = Self::parse_marker_number(&items[1].2.marker);
97
98        // Fast path: Check for Ordered0 special case (starts with 0, 1)
99        if matches!((first_num, second_num), (Some(0), Some(1))) {
100            return ListStyle::Ordered0;
101        }
102
103        // Fast path: If first 2 items aren't both "1", it must be Ordered (O(1))
104        // This handles ~95% of lists instantly: "1. 2. 3...", "2. 3. 4...", etc.
105        if first_num != Some(1) || second_num != Some(1) {
106            return ListStyle::Ordered;
107        }
108
109        // Slow path: Both first items are "1", check if ALL are "1" (O(n))
110        // This is necessary for lists like "1. 1. 1..." vs "1. 1. 2. 3..."
111        let all_ones = items
112            .iter()
113            .all(|(_, _, item)| Self::parse_marker_number(&item.marker) == Some(1));
114
115        if all_ones {
116            ListStyle::OneOne
117        } else {
118            ListStyle::Ordered
119        }
120    }
121
122    /// Build maps from line number to list ID and list ID to start value using pulldown-cmark's AST.
123    /// This is the authoritative source of truth for list membership and respects CommonMark's
124    /// list start values (e.g., a list that starts at 11 is valid if items are 11, 12, 13...).
125    fn build_commonmark_list_membership(
126        content: &str,
127    ) -> (
128        std::collections::HashMap<usize, usize>,
129        std::collections::HashMap<usize, u64>,
130    ) {
131        let mut line_to_list: std::collections::HashMap<usize, usize> = std::collections::HashMap::new();
132        let mut list_start_values: std::collections::HashMap<usize, u64> = std::collections::HashMap::new();
133
134        // Pre-compute line start offsets for byte-to-line conversion
135        let line_starts: Vec<usize> = std::iter::once(0)
136            .chain(content.match_indices('\n').map(|(i, _)| i + 1))
137            .collect();
138
139        let byte_to_line = |byte_offset: usize| -> usize {
140            line_starts
141                .iter()
142                .rposition(|&start| start <= byte_offset)
143                .map(|i| i + 1) // 1-indexed
144                .unwrap_or(1)
145        };
146
147        let options = Options::empty();
148        let parser = Parser::new_ext(content, options);
149
150        let mut list_stack: Vec<(usize, bool, u64)> = Vec::new(); // (list_id, is_ordered, start_value)
151        let mut next_list_id = 0;
152
153        for (event, range) in parser.into_offset_iter() {
154            match event {
155                Event::Start(Tag::List(start_num)) => {
156                    let is_ordered = start_num.is_some();
157                    let start_value = start_num.unwrap_or(1);
158                    list_stack.push((next_list_id, is_ordered, start_value));
159                    if is_ordered {
160                        list_start_values.insert(next_list_id, start_value);
161                    }
162                    next_list_id += 1;
163                }
164                Event::End(TagEnd::List(_)) => {
165                    list_stack.pop();
166                }
167                Event::Start(Tag::Item) => {
168                    // Record the line number of this item and its list ID
169                    if let Some(&(list_id, is_ordered, _)) = list_stack.last()
170                        && is_ordered
171                    {
172                        let line_num = byte_to_line(range.start);
173                        line_to_list.insert(line_num, list_id);
174                    }
175                }
176                _ => {}
177            }
178        }
179
180        (line_to_list, list_start_values)
181    }
182
183    /// Group ordered items by their CommonMark list membership.
184    /// Returns (list_id, items) tuples for each distinct list, where items are (line_num, LineInfo, ListItemInfo).
185    fn group_items_by_commonmark_list<'a>(
186        ctx: &'a crate::lint_context::LintContext,
187        line_to_list: &std::collections::HashMap<usize, usize>,
188    ) -> Vec<ListItemGroup<'a>> {
189        // Collect all ordered items with their list IDs
190        let mut items_with_list_id: Vec<(
191            usize,
192            usize,
193            &crate::lint_context::LineInfo,
194            &crate::lint_context::ListItemInfo,
195        )> = Vec::new();
196
197        for line_num in 1..=ctx.lines.len() {
198            if let Some(line_info) = ctx.line_info(line_num)
199                && let Some(list_item) = line_info.list_item.as_deref()
200                && list_item.is_ordered
201            {
202                // Get the list ID from pulldown-cmark's grouping
203                if let Some(&list_id) = line_to_list.get(&line_num) {
204                    items_with_list_id.push((list_id, line_num, line_info, list_item));
205                }
206            }
207        }
208
209        // Group by list_id
210        let mut groups: std::collections::HashMap<
211            usize,
212            Vec<(
213                usize,
214                &crate::lint_context::LineInfo,
215                &crate::lint_context::ListItemInfo,
216            )>,
217        > = std::collections::HashMap::new();
218
219        for (list_id, line_num, line_info, list_item) in items_with_list_id {
220            groups
221                .entry(list_id)
222                .or_default()
223                .push((line_num, line_info, list_item));
224        }
225
226        // Convert to Vec of (list_id, items), sort each group by line number, and sort groups by first line
227        let mut result: Vec<_> = groups.into_iter().collect();
228        for (_, items) in &mut result {
229            items.sort_by_key(|(line_num, _, _)| *line_num);
230        }
231        // Sort groups by their first item's line number for deterministic output
232        result.sort_by_key(|(_, items)| items.first().map(|(ln, _, _)| *ln).unwrap_or(0));
233
234        result
235    }
236
237    /// Check a CommonMark-grouped list for correct ordering.
238    /// Uses the CommonMark start value to validate items (e.g., a list starting at 11
239    /// expects items 11, 12, 13... - no violation there).
240    fn check_commonmark_list_group(
241        &self,
242        _ctx: &crate::lint_context::LintContext,
243        group: &[(
244            usize,
245            &crate::lint_context::LineInfo,
246            &crate::lint_context::ListItemInfo,
247        )],
248        warnings: &mut Vec<LintWarning>,
249        document_wide_style: Option<ListStyle>,
250        start_value: u64,
251    ) {
252        if group.is_empty() {
253            return;
254        }
255
256        // Group items by indentation level (marker_column) to handle nested lists
257        type LevelGroups<'a> = HashMap<
258            usize,
259            Vec<(
260                usize,
261                &'a crate::lint_context::LineInfo,
262                &'a crate::lint_context::ListItemInfo,
263            )>,
264        >;
265        let mut level_groups: LevelGroups = HashMap::new();
266
267        for (line_num, line_info, list_item) in group {
268            level_groups
269                .entry(list_item.marker_column)
270                .or_default()
271                .push((*line_num, *line_info, *list_item));
272        }
273
274        // Process each indentation level in sorted order for deterministic output
275        let mut sorted_levels: Vec<_> = level_groups.into_iter().collect();
276        sorted_levels.sort_by_key(|(indent, _)| *indent);
277
278        for (_indent, mut items) in sorted_levels {
279            // Sort by line number
280            items.sort_by_key(|(line_num, _, _)| *line_num);
281
282            if items.is_empty() {
283                continue;
284            }
285
286            // Determine style for this group
287            let detected_style = if let Some(doc_style) = document_wide_style.clone() {
288                Some(doc_style)
289            } else if self.config.style == ListStyle::OneOrOrdered {
290                Some(Self::detect_list_style(&items, start_value))
291            } else {
292                None
293            };
294
295            // Check each item using the CommonMark start value
296            for (idx, (line_num, line_info, list_item)) in items.iter().enumerate() {
297                if let Some(actual_num) = Self::parse_marker_number(&list_item.marker) {
298                    let expected_num = self.get_expected_number(idx, detected_style.clone(), start_value);
299
300                    if actual_num != expected_num {
301                        let marker_start = line_info.byte_offset + list_item.marker_column;
302                        let number_len = if let Some(dot_pos) = list_item.marker.find('.') {
303                            dot_pos
304                        } else if let Some(paren_pos) = list_item.marker.find(')') {
305                            paren_pos
306                        } else {
307                            list_item.marker.len()
308                        };
309
310                        let style_name = match detected_style.as_ref().unwrap_or(&ListStyle::Ordered) {
311                            ListStyle::OneOne => "one",
312                            ListStyle::Ordered => "ordered",
313                            ListStyle::Ordered0 => "ordered0",
314                            _ => "ordered",
315                        };
316
317                        let style_context = match self.config.style {
318                            ListStyle::Consistent => format!("document style '{style_name}'"),
319                            ListStyle::OneOrOrdered => format!("list style '{style_name}'"),
320                            ListStyle::One | ListStyle::OneOne => "configured style 'one'".to_string(),
321                            ListStyle::Ordered => "configured style 'ordered'".to_string(),
322                            ListStyle::Ordered0 => "configured style 'ordered0'".to_string(),
323                        };
324
325                        // Only provide auto-fix when:
326                        // 1. The list starts at 1 (default numbering), OR
327                        // 2. We're using explicit 'one' style (numbers are meaningless)
328                        // When start_value > 1, the user explicitly chose that number,
329                        // so auto-fixing would destroy their intent.
330                        let should_provide_fix =
331                            start_value == 1 || matches!(self.config.style, ListStyle::One | ListStyle::OneOne);
332
333                        warnings.push(LintWarning {
334                            rule_name: Some(self.name().to_string()),
335                            message: format!(
336                                "Ordered list item number {actual_num} does not match {style_context} (expected {expected_num})"
337                            ),
338                            line: *line_num,
339                            column: list_item.marker_column + 1,
340                            end_line: *line_num,
341                            end_column: list_item.marker_column + number_len + 1,
342                            severity: Severity::Warning,
343                            fix: if should_provide_fix {
344                                Some(Fix {
345                                    range: marker_start..marker_start + number_len,
346                                    replacement: expected_num.to_string(),
347                                })
348                            } else {
349                                None
350                            },
351                        });
352                    }
353                }
354            }
355        }
356    }
357}
358
359impl Rule for MD029OrderedListPrefix {
360    fn name(&self) -> &'static str {
361        "MD029"
362    }
363
364    fn description(&self) -> &'static str {
365        "Ordered list marker value"
366    }
367
368    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
369        // Early returns for performance
370        if ctx.content.is_empty() {
371            return Ok(Vec::new());
372        }
373
374        // Quick check for any ordered list markers before processing
375        if (!ctx.content.contains('.') && !ctx.content.contains(')'))
376            || !ctx.content.lines().any(|line| ORDERED_LIST_MARKER_REGEX.is_match(line))
377        {
378            return Ok(Vec::new());
379        }
380
381        let mut warnings = Vec::new();
382
383        // Use pulldown-cmark's AST for authoritative list membership and start values.
384        // This respects CommonMark's list start values (e.g., a list starting at 11
385        // expects items 11, 12, 13... - no violation there).
386        let (line_to_list, list_start_values) = Self::build_commonmark_list_membership(ctx.content);
387        let list_groups = Self::group_items_by_commonmark_list(ctx, &line_to_list);
388
389        if list_groups.is_empty() {
390            return Ok(Vec::new());
391        }
392
393        // For Consistent style, detect document-wide prevalent style
394        let document_wide_style = if self.config.style == ListStyle::Consistent {
395            // Collect ALL ordered items from ALL groups
396            let mut all_document_items = Vec::new();
397            for (_, items) in &list_groups {
398                for (line_num, line_info, list_item) in items {
399                    all_document_items.push((*line_num, *line_info, *list_item));
400                }
401            }
402            // Detect style across entire document (use 1 as default for pattern detection)
403            if !all_document_items.is_empty() {
404                Some(Self::detect_list_style(&all_document_items, 1))
405            } else {
406                None
407            }
408        } else {
409            None
410        };
411
412        // Process each CommonMark-defined list group with its start value
413        for (list_id, items) in list_groups {
414            let start_value = list_start_values.get(&list_id).copied().unwrap_or(1);
415            self.check_commonmark_list_group(ctx, &items, &mut warnings, document_wide_style.clone(), start_value);
416        }
417
418        // Sort warnings by line number for deterministic output
419        warnings.sort_by_key(|w| (w.line, w.column));
420
421        Ok(warnings)
422    }
423
424    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
425        // Use the same logic as check() - just apply the fixes from warnings
426        let warnings = self.check(ctx)?;
427
428        if warnings.is_empty() {
429            // No changes needed
430            return Ok(ctx.content.to_string());
431        }
432
433        // Collect fixes and sort by position
434        let mut fixes: Vec<&Fix> = Vec::new();
435        for warning in &warnings {
436            if let Some(ref fix) = warning.fix {
437                fixes.push(fix);
438            }
439        }
440        fixes.sort_by_key(|f| f.range.start);
441
442        let mut result = String::new();
443        let mut last_pos = 0;
444        let content_bytes = ctx.content.as_bytes();
445
446        for fix in fixes {
447            // Add content before the fix
448            if last_pos < fix.range.start {
449                let chunk = &content_bytes[last_pos..fix.range.start];
450                result.push_str(
451                    std::str::from_utf8(chunk).map_err(|_| LintError::InvalidInput("Invalid UTF-8".to_string()))?,
452                );
453            }
454            // Add the replacement
455            result.push_str(&fix.replacement);
456            last_pos = fix.range.end;
457        }
458
459        // Add remaining content
460        if last_pos < content_bytes.len() {
461            let chunk = &content_bytes[last_pos..];
462            result.push_str(
463                std::str::from_utf8(chunk).map_err(|_| LintError::InvalidInput("Invalid UTF-8".to_string()))?,
464            );
465        }
466
467        Ok(result)
468    }
469
470    /// Get the category of this rule for selective processing
471    fn category(&self) -> RuleCategory {
472        RuleCategory::List
473    }
474
475    /// Check if this rule should be skipped
476    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
477        ctx.content.is_empty() || !ctx.likely_has_lists()
478    }
479
480    fn as_any(&self) -> &dyn std::any::Any {
481        self
482    }
483
484    fn default_config_section(&self) -> Option<(String, toml::Value)> {
485        let default_config = MD029Config::default();
486        let json_value = serde_json::to_value(&default_config).ok()?;
487        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
488        if let toml::Value::Table(table) = toml_value {
489            if !table.is_empty() {
490                Some((MD029Config::RULE_NAME.to_string(), toml::Value::Table(table)))
491            } else {
492                None
493            }
494        } else {
495            None
496        }
497    }
498
499    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
500    where
501        Self: Sized,
502    {
503        let rule_config = crate::rule_config_serde::load_rule_config::<MD029Config>(config);
504        Box::new(MD029OrderedListPrefix::from_config_struct(rule_config))
505    }
506}
507
508#[cfg(test)]
509mod tests {
510    use super::*;
511
512    #[test]
513    fn test_basic_functionality() {
514        // Test with default style (ordered)
515        let rule = MD029OrderedListPrefix::default();
516
517        // Test with correctly ordered list
518        let content = "1. First item\n2. Second item\n3. Third item";
519        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
520        let result = rule.check(&ctx).unwrap();
521        assert!(result.is_empty());
522
523        // Test with incorrectly ordered list
524        let content = "1. First item\n3. Third item\n5. Fifth item";
525        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
526        let result = rule.check(&ctx).unwrap();
527        assert_eq!(result.len(), 2); // Should have warnings for items 3 and 5
528
529        // Test with one-one style
530        let rule = MD029OrderedListPrefix::new(ListStyle::OneOne);
531        let content = "1. First item\n2. Second item\n3. Third item";
532        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
533        let result = rule.check(&ctx).unwrap();
534        assert_eq!(result.len(), 2); // Should have warnings for items 2 and 3
535
536        // Test with ordered0 style
537        let rule = MD029OrderedListPrefix::new(ListStyle::Ordered0);
538        let content = "0. First item\n1. Second item\n2. Third item";
539        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
540        let result = rule.check(&ctx).unwrap();
541        assert!(result.is_empty());
542    }
543
544    #[test]
545    fn test_redundant_computation_fix() {
546        // This test confirms that the redundant computation bug is fixed
547        // Previously: get_list_number() was called twice (once for is_some(), once for unwrap())
548        // Now: get_list_number() is called once with if let pattern
549
550        let rule = MD029OrderedListPrefix::default();
551
552        // Test with mixed valid and edge case content
553        let content = "1. First item\n3. Wrong number\n2. Another wrong number";
554        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
555
556        // This should not panic and should produce warnings for incorrect numbering
557        let result = rule.check(&ctx).unwrap();
558        assert_eq!(result.len(), 2); // Should have warnings for items 3 and 2
559
560        // Verify the warnings have correct content
561        assert!(result[0].message.contains("3") && result[0].message.contains("expected 2"));
562        assert!(result[1].message.contains("2") && result[1].message.contains("expected 3"));
563    }
564
565    #[test]
566    fn test_performance_improvement() {
567        // This test verifies the rule handles large lists without performance issues
568        let rule = MD029OrderedListPrefix::default();
569
570        // Create a larger list with WRONG numbers: 1, 5, 10, 15, ...
571        // Starting at 1, CommonMark expects 1, 2, 3, 4, ...
572        // So items 2-100 are all wrong (expected 2, got 5; expected 3, got 10; etc.)
573        let mut content = String::from("1. Item 1\n"); // First item correct
574        for i in 2..=100 {
575            content.push_str(&format!("{}. Item {}\n", i * 5 - 5, i)); // Wrong numbers
576        }
577
578        let ctx = crate::lint_context::LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
579
580        // This should complete without issues and produce warnings for items 2-100
581        let result = rule.check(&ctx).unwrap();
582        assert_eq!(result.len(), 99, "Should have warnings for items 2-100 (99 items)");
583
584        // First wrong item: "5. Item 2" (expected 2)
585        assert!(result[0].message.contains("5") && result[0].message.contains("expected 2"));
586    }
587
588    #[test]
589    fn test_one_or_ordered_with_all_ones() {
590        // Test OneOrOrdered style with all 1s (should pass)
591        let rule = MD029OrderedListPrefix::new(ListStyle::OneOrOrdered);
592
593        let content = "1. First item\n1. Second item\n1. Third item";
594        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
595        let result = rule.check(&ctx).unwrap();
596        assert!(result.is_empty(), "All ones should be valid in OneOrOrdered mode");
597    }
598
599    #[test]
600    fn test_one_or_ordered_with_sequential() {
601        // Test OneOrOrdered style with sequential numbering (should pass)
602        let rule = MD029OrderedListPrefix::new(ListStyle::OneOrOrdered);
603
604        let content = "1. First item\n2. Second item\n3. Third item";
605        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
606        let result = rule.check(&ctx).unwrap();
607        assert!(
608            result.is_empty(),
609            "Sequential numbering should be valid in OneOrOrdered mode"
610        );
611    }
612
613    #[test]
614    fn test_one_or_ordered_with_mixed_style() {
615        // Test OneOrOrdered style with mixed numbering (should fail)
616        let rule = MD029OrderedListPrefix::new(ListStyle::OneOrOrdered);
617
618        let content = "1. First item\n2. Second item\n1. Third item";
619        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
620        let result = rule.check(&ctx).unwrap();
621        assert_eq!(result.len(), 1, "Mixed style should produce one warning");
622        assert!(result[0].message.contains("1") && result[0].message.contains("expected 3"));
623    }
624
625    #[test]
626    fn test_one_or_ordered_separate_lists() {
627        // Test OneOrOrdered with separate lists using different styles (should pass)
628        let rule = MD029OrderedListPrefix::new(ListStyle::OneOrOrdered);
629
630        let content = "# First list\n\n1. Item A\n1. Item B\n\n# Second list\n\n1. Item X\n2. Item Y\n3. Item Z";
631        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
632        let result = rule.check(&ctx).unwrap();
633        assert!(
634            result.is_empty(),
635            "Separate lists can use different styles in OneOrOrdered mode"
636        );
637    }
638}