rumdl_lib/rules/
md029_ordered_list_prefix.rs

1/// Rule MD029: Ordered list item prefix
2///
3/// See [docs/md029.md](../../docs/md029.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
7use crate::utils::regex_cache::ORDERED_LIST_MARKER_REGEX;
8use toml;
9
10mod md029_config;
11pub use md029_config::{ListStyle, MD029Config};
12
13#[derive(Debug, Clone, Default)]
14pub struct MD029OrderedListPrefix {
15    config: MD029Config,
16}
17
18impl MD029OrderedListPrefix {
19    pub fn new(style: ListStyle) -> Self {
20        Self {
21            config: MD029Config { style },
22        }
23    }
24
25    pub fn from_config_struct(config: MD029Config) -> Self {
26        Self { config }
27    }
28
29    #[inline]
30    fn parse_marker_number(marker: &str) -> Option<usize> {
31        // Handle marker format like "1." or "1"
32        let num_part = if let Some(stripped) = marker.strip_suffix('.') {
33            stripped
34        } else {
35            marker
36        };
37        num_part.parse::<usize>().ok()
38    }
39
40    #[inline]
41    fn get_expected_number(&self, index: usize) -> usize {
42        match self.config.style {
43            ListStyle::One => 1,
44            ListStyle::OneOne => 1,
45            ListStyle::Ordered => index + 1,
46            ListStyle::Ordered0 => index,
47        }
48    }
49}
50
51impl Rule for MD029OrderedListPrefix {
52    fn name(&self) -> &'static str {
53        "MD029"
54    }
55
56    fn description(&self) -> &'static str {
57        "Ordered list marker value"
58    }
59
60    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
61        // Early returns for performance
62        if ctx.content.is_empty() {
63            return Ok(Vec::new());
64        }
65
66        // Quick check for any ordered list markers before processing
67        if !ctx.content.contains('.') || !ctx.content.lines().any(|line| ORDERED_LIST_MARKER_REGEX.is_match(line)) {
68            return Ok(Vec::new());
69        }
70
71        let mut warnings = Vec::new();
72
73        // Collect all list blocks that contain ordered items (not just purely ordered blocks)
74        // This handles mixed lists where ordered items are nested within unordered lists
75        let blocks_with_ordered: Vec<_> = ctx
76            .list_blocks
77            .iter()
78            .filter(|block| {
79                // Check if this block contains any ordered items
80                block.item_lines.iter().any(|&line| {
81                    ctx.line_info(line)
82                        .and_then(|info| info.list_item.as_ref())
83                        .map(|item| item.is_ordered)
84                        .unwrap_or(false)
85                })
86            })
87            .collect();
88
89        if blocks_with_ordered.is_empty() {
90            return Ok(Vec::new());
91        }
92
93        // Group consecutive list blocks that should be treated as continuous
94        let mut block_groups = Vec::new();
95        let mut current_group = vec![blocks_with_ordered[0]];
96
97        for i in 1..blocks_with_ordered.len() {
98            let prev_block = blocks_with_ordered[i - 1];
99            let current_block = blocks_with_ordered[i];
100
101            // This catches the pattern: 1. item / - sub / 1. item (should be 2.)
102            let has_only_unindented_lists =
103                self.has_only_unindented_lists_between(ctx, prev_block.end_line, current_block.start_line);
104
105            // Be more conservative: only group if there are no structural separators
106            // Check specifically for headings between the blocks
107            let has_heading_between =
108                self.has_heading_between_blocks(ctx, prev_block.end_line, current_block.start_line);
109
110            // Check if there are only code blocks/fences between these list blocks
111            let between_content_is_code_only =
112                self.is_only_code_between_blocks(ctx, prev_block.end_line, current_block.start_line);
113
114            // Group blocks if:
115            // 1. They have only code between them, OR
116            // 2. They have only unindented list items between them (the new case!)
117            let should_group = (between_content_is_code_only || has_only_unindented_lists)
118                && self.blocks_are_logically_continuous(ctx, prev_block.end_line, current_block.start_line)
119                && !has_heading_between;
120
121            if should_group {
122                // Treat as continuation of the same logical list
123                current_group.push(current_block);
124            } else {
125                // Start a new list group
126                block_groups.push(current_group);
127                current_group = vec![current_block];
128            }
129        }
130        block_groups.push(current_group);
131
132        // Process each group of blocks as a continuous list
133        for group in block_groups {
134            self.check_ordered_list_group(ctx, &group, &mut warnings);
135        }
136
137        Ok(warnings)
138    }
139
140    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
141        // Use the same logic as check() - just apply the fixes from warnings
142        let warnings = self.check(ctx)?;
143
144        if warnings.is_empty() {
145            // No changes needed
146            return Ok(ctx.content.to_string());
147        }
148
149        // Collect fixes and sort by position
150        // Only apply MD029 fixes (numbering), not MD029-style fixes (indentation)
151        let mut fixes: Vec<&Fix> = Vec::new();
152        for warning in &warnings {
153            // Skip MD029-style warnings (lazy continuation indentation)
154            if warning.rule_name == Some("MD029-style") {
155                continue;
156            }
157            if let Some(ref fix) = warning.fix {
158                fixes.push(fix);
159            }
160        }
161        fixes.sort_by_key(|f| f.range.start);
162
163        let mut result = String::new();
164        let mut last_pos = 0;
165        let content_bytes = ctx.content.as_bytes();
166
167        for fix in fixes {
168            // Add content before the fix
169            if last_pos < fix.range.start {
170                let chunk = &content_bytes[last_pos..fix.range.start];
171                result.push_str(
172                    std::str::from_utf8(chunk).map_err(|_| LintError::InvalidInput("Invalid UTF-8".to_string()))?,
173                );
174            }
175            // Add the replacement
176            result.push_str(&fix.replacement);
177            last_pos = fix.range.end;
178        }
179
180        // Add remaining content
181        if last_pos < content_bytes.len() {
182            let chunk = &content_bytes[last_pos..];
183            result.push_str(
184                std::str::from_utf8(chunk).map_err(|_| LintError::InvalidInput("Invalid UTF-8".to_string()))?,
185            );
186        }
187
188        Ok(result)
189    }
190
191    /// Optimized check using document structure
192    fn check_with_structure(
193        &self,
194        ctx: &crate::lint_context::LintContext,
195        _structure: &crate::utils::document_structure::DocumentStructure,
196    ) -> LintResult {
197        // For MD029, we need to use the regular check method to get lazy continuation detection
198        // The document structure optimization doesn't provide enough context for proper lazy continuation checking
199        self.check(ctx)
200    }
201
202    /// Get the category of this rule for selective processing
203    fn category(&self) -> RuleCategory {
204        RuleCategory::List
205    }
206
207    /// Check if this rule should be skipped
208    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
209        let content = ctx.content;
210        content.is_empty()
211            || !content.contains('1')
212            || (!content.contains("1.") && !content.contains("2.") && !content.contains("0."))
213    }
214
215    fn as_any(&self) -> &dyn std::any::Any {
216        self
217    }
218
219    fn as_maybe_document_structure(&self) -> Option<&dyn crate::rule::MaybeDocumentStructure> {
220        Some(self)
221    }
222
223    fn default_config_section(&self) -> Option<(String, toml::Value)> {
224        let default_config = MD029Config::default();
225        let json_value = serde_json::to_value(&default_config).ok()?;
226        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
227        if let toml::Value::Table(table) = toml_value {
228            if !table.is_empty() {
229                Some((MD029Config::RULE_NAME.to_string(), toml::Value::Table(table)))
230            } else {
231                None
232            }
233        } else {
234            None
235        }
236    }
237
238    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
239    where
240        Self: Sized,
241    {
242        let rule_config = crate::rule_config_serde::load_rule_config::<MD029Config>(config);
243        Box::new(MD029OrderedListPrefix::from_config_struct(rule_config))
244    }
245}
246
247impl DocumentStructureExtensions for MD029OrderedListPrefix {
248    fn has_relevant_elements(
249        &self,
250        ctx: &crate::lint_context::LintContext,
251        _doc_structure: &DocumentStructure,
252    ) -> bool {
253        // This rule is relevant if there are any ordered list items
254        // We need to check even lists with all "1." items for:
255        // 1. Incorrect numbering according to configured style
256        // 2. Lazy continuation issues
257        ctx.list_blocks.iter().any(|block| block.is_ordered)
258    }
259}
260
261impl MD029OrderedListPrefix {
262    /// Check for lazy continuation lines in a list block
263    fn check_for_lazy_continuation(
264        &self,
265        ctx: &crate::lint_context::LintContext,
266        list_block: &crate::lint_context::ListBlock,
267        warnings: &mut Vec<LintWarning>,
268    ) {
269        // Check all lines in the block for lazy continuation
270        for line_num in list_block.start_line..=list_block.end_line {
271            if let Some(line_info) = ctx.line_info(line_num) {
272                // Skip list item lines themselves
273                if list_block.item_lines.contains(&line_num) {
274                    continue;
275                }
276
277                // Skip blank lines
278                if line_info.is_blank {
279                    continue;
280                }
281
282                // Skip lines that are in code blocks
283                if line_info.in_code_block {
284                    continue;
285                }
286
287                // Skip code fence lines
288                let trimmed = line_info.content.trim();
289                if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
290                    continue;
291                }
292
293                // Skip headings - they should never be treated as lazy continuation
294                if line_info.heading.is_some() {
295                    continue;
296                }
297
298                // Check if this is a lazy continuation (0-2 spaces)
299                if line_info.indent <= 2 && !line_info.content.trim().is_empty() {
300                    // This is a lazy continuation - add a style warning
301                    let col = line_info.indent + 1;
302
303                    warnings.push(LintWarning {
304                        rule_name: Some("MD029-style"),
305                        message: "List continuation should be indented (lazy continuation detected)".to_string(),
306                        line: line_num,
307                        column: col,
308                        end_line: line_num,
309                        end_column: col,
310                        severity: Severity::Warning,
311                        fix: Some(Fix {
312                            range: line_info.byte_offset..line_info.byte_offset,
313                            replacement: "   ".to_string(), // Add 3 spaces
314                        }),
315                    });
316                }
317            }
318        }
319    }
320
321    /// Check if blocks are separated only by unindented list items
322    /// This helps detect the pattern: 1. item / - sub / 1. item (should be 2.)
323    fn has_only_unindented_lists_between(
324        &self,
325        ctx: &crate::lint_context::LintContext,
326        end_line: usize,
327        start_line: usize,
328    ) -> bool {
329        if end_line >= start_line {
330            return false;
331        }
332
333        for line_num in (end_line + 1)..start_line {
334            if let Some(line_info) = ctx.line_info(line_num) {
335                let trimmed = line_info.content.trim();
336
337                // Skip empty lines
338                if trimmed.is_empty() {
339                    continue;
340                }
341
342                // If it's an unindented list item (column 0), that's what we're looking for
343                if line_info.list_item.is_some() && line_info.indent == 0 {
344                    continue;
345                }
346
347                // Any other non-empty content means it's not just unindented lists
348                return false;
349            }
350        }
351
352        true
353    }
354
355    /// Check if two list blocks are logically continuous (no major structural separators)
356    fn blocks_are_logically_continuous(
357        &self,
358        ctx: &crate::lint_context::LintContext,
359        end_line: usize,
360        start_line: usize,
361    ) -> bool {
362        if end_line >= start_line {
363            return false;
364        }
365
366        for line_num in (end_line + 1)..start_line {
367            if let Some(line_info) = ctx.line_info(line_num) {
368                // Skip empty lines
369                if line_info.is_blank {
370                    continue;
371                }
372
373                // Skip lines in code blocks
374                if line_info.in_code_block {
375                    continue;
376                }
377
378                // If there's any heading, the lists are not continuous
379                if line_info.heading.is_some() {
380                    return false;
381                }
382
383                // If there's any other non-empty content, be conservative and separate
384                let trimmed = line_info.content.trim();
385                if !trimmed.is_empty() && !trimmed.starts_with("```") && !trimmed.starts_with("~~~") {
386                    return false;
387                }
388            }
389        }
390
391        true
392    }
393
394    fn is_only_code_between_blocks(
395        &self,
396        ctx: &crate::lint_context::LintContext,
397        end_line: usize,
398        start_line: usize,
399    ) -> bool {
400        if end_line >= start_line {
401            return false;
402        }
403
404        // Calculate minimum continuation indent from the previous block's last item
405        let min_continuation_indent =
406            if let Some(prev_block) = ctx.list_blocks.iter().find(|block| block.end_line == end_line) {
407                // Get the last list item from the previous block
408                if let Some(&last_item_line) = prev_block.item_lines.last() {
409                    if let Some(line_info) = ctx.line_info(last_item_line) {
410                        if let Some(list_item) = &line_info.list_item {
411                            if list_item.is_ordered {
412                                list_item.marker.len() + 1 // Add 1 for space after ordered markers
413                            } else {
414                                2 // Unordered lists need at least 2 spaces
415                            }
416                        } else {
417                            3 // Fallback
418                        }
419                    } else {
420                        3 // Fallback
421                    }
422                } else {
423                    3 // Fallback
424                }
425            } else {
426                3 // Fallback
427            };
428
429        for line_num in (end_line + 1)..start_line {
430            if let Some(line_info) = ctx.line_info(line_num) {
431                let trimmed = line_info.content.trim();
432
433                // Skip empty lines
434                if trimmed.is_empty() {
435                    continue;
436                }
437
438                // Enhanced code block analysis
439                if line_info.in_code_block || trimmed.starts_with("```") || trimmed.starts_with("~~~") {
440                    // Check if this is a standalone code block that should separate lists
441                    if line_info.in_code_block {
442                        // Use the new classification system to determine if this code block separates lists
443                        let context = crate::utils::code_block_utils::CodeBlockUtils::analyze_code_block_context(
444                            &ctx.lines,
445                            line_num - 1,
446                            min_continuation_indent,
447                        );
448
449                        // If it's a standalone code block, lists should be separated
450                        if matches!(context, crate::utils::code_block_utils::CodeBlockContext::Standalone) {
451                            return false; // Lists are separated, not continuous
452                        }
453                    }
454                    continue; // Other code block lines (indented/adjacent) don't break continuity
455                }
456
457                // If there's a heading, lists are definitely separated
458                if line_info.heading.is_some() {
459                    return false;
460                }
461
462                // Any other non-empty content means lists are truly separated
463                return false;
464            }
465        }
466
467        true
468    }
469
470    /// Check if there are any headings between two list blocks
471    fn has_heading_between_blocks(
472        &self,
473        ctx: &crate::lint_context::LintContext,
474        end_line: usize,
475        start_line: usize,
476    ) -> bool {
477        if end_line >= start_line {
478            return false;
479        }
480
481        for line_num in (end_line + 1)..start_line {
482            if let Some(line_info) = ctx.line_info(line_num)
483                && line_info.heading.is_some()
484            {
485                return true;
486            }
487        }
488
489        false
490    }
491
492    /// Find the closest parent list item for an ordered item (can be ordered or unordered)
493    /// Returns the line number of the parent, or 0 if no parent found
494    fn find_parent_list_item(
495        &self,
496        ctx: &crate::lint_context::LintContext,
497        ordered_line: usize,
498        ordered_indent: usize,
499    ) -> usize {
500        // Look backward from the ordered item to find its closest parent
501        for line_num in (1..ordered_line).rev() {
502            if let Some(line_info) = ctx.line_info(line_num) {
503                if let Some(list_item) = &line_info.list_item {
504                    // Found a list item - check if it could be the parent
505                    if list_item.marker_column < ordered_indent {
506                        // This list item is at a lower indentation, so it's the parent
507                        return line_num;
508                    }
509                }
510                // If we encounter non-blank, non-list content at column 0, stop looking
511                else if !line_info.is_blank && line_info.indent == 0 {
512                    break;
513                }
514            }
515        }
516        0 // No parent found
517    }
518
519    /// Check a group of ordered list blocks that should be treated as continuous
520    fn check_ordered_list_group(
521        &self,
522        ctx: &crate::lint_context::LintContext,
523        group: &[&crate::lint_context::ListBlock],
524        warnings: &mut Vec<LintWarning>,
525    ) {
526        // Collect all items from all blocks in the group
527        let mut all_items = Vec::new();
528
529        for list_block in group {
530            // First, check for lazy continuation in this block
531            self.check_for_lazy_continuation(ctx, list_block, warnings);
532
533            for &item_line in &list_block.item_lines {
534                if let Some(line_info) = ctx.line_info(item_line)
535                    && let Some(list_item) = &line_info.list_item
536                {
537                    // Skip unordered lists (safety check)
538                    if !list_item.is_ordered {
539                        continue;
540                    }
541                    all_items.push((item_line, line_info, list_item));
542                }
543            }
544        }
545
546        // Sort by line number to ensure correct order
547        all_items.sort_by_key(|(line_num, _, _)| *line_num);
548
549        // Group items by indentation level AND parent context
550        // Use (indent_level, parent_line) as the key to separate sequences under different parents
551        type LevelGroups<'a> = std::collections::HashMap<
552            (usize, usize),
553            Vec<(
554                usize,
555                &'a crate::lint_context::LineInfo,
556                &'a crate::lint_context::ListItemInfo,
557            )>,
558        >;
559        let mut level_groups: LevelGroups = std::collections::HashMap::new();
560
561        for (line_num, line_info, list_item) in all_items {
562            // Find the closest parent list item (ordered or unordered) for this ordered item
563            let parent_line = self.find_parent_list_item(ctx, line_num, list_item.marker_column);
564
565            // Group by both marker column (indentation level) and parent context
566            level_groups
567                .entry((list_item.marker_column, parent_line))
568                .or_default()
569                .push((line_num, line_info, list_item));
570        }
571
572        // Process each indentation level and parent context separately
573        for ((_indent, _parent), mut group) in level_groups {
574            // Sort by line number to ensure correct order
575            group.sort_by_key(|(line_num, _, _)| *line_num);
576
577            // Check each item in the group for correct sequence
578            for (idx, (line_num, line_info, list_item)) in group.iter().enumerate() {
579                // Parse the actual number from the marker (e.g., "1." -> 1)
580                if let Some(actual_num) = Self::parse_marker_number(&list_item.marker) {
581                    let expected_num = self.get_expected_number(idx);
582
583                    if actual_num != expected_num {
584                        // Calculate byte position for the fix
585                        let marker_start = line_info.byte_offset + list_item.marker_column;
586                        // Use the actual marker length (e.g., "05" is 2 chars, not 1)
587                        let number_len = if let Some(dot_pos) = list_item.marker.find('.') {
588                            dot_pos // Length up to the dot
589                        } else if let Some(paren_pos) = list_item.marker.find(')') {
590                            paren_pos // Length up to the paren
591                        } else {
592                            list_item.marker.len() // Fallback to full marker length
593                        };
594
595                        warnings.push(LintWarning {
596                            rule_name: Some(self.name()),
597                            message: format!(
598                                "Ordered list item number {actual_num} does not match style (expected {expected_num})"
599                            ),
600                            line: *line_num,
601                            column: list_item.marker_column + 1,
602                            end_line: *line_num,
603                            end_column: list_item.marker_column + number_len + 1,
604                            severity: Severity::Warning,
605                            fix: Some(Fix {
606                                range: marker_start..marker_start + number_len,
607                                replacement: expected_num.to_string(),
608                            }),
609                        });
610                    }
611                }
612            }
613        }
614    }
615}
616
617#[cfg(test)]
618mod tests {
619    use super::*;
620
621    use crate::utils::document_structure::DocumentStructure;
622
623    #[test]
624    fn test_with_document_structure() {
625        // Test with default style (ordered)
626        let rule = MD029OrderedListPrefix::default();
627
628        // Test with correctly ordered list
629        let content = "1. First item\n2. Second item\n3. Third item";
630        let structure = DocumentStructure::new(content);
631        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
632        let result = rule.check_with_structure(&ctx, &structure).unwrap();
633        assert!(result.is_empty());
634
635        // Test with incorrectly ordered list
636        let content = "1. First item\n3. Third item\n5. Fifth item";
637        let structure = DocumentStructure::new(content);
638        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
639        let result = rule.check_with_structure(&ctx, &structure).unwrap();
640        assert_eq!(result.len(), 2); // Should have warnings for items 3 and 5
641
642        // Test with one-one style
643        let rule = MD029OrderedListPrefix::new(ListStyle::OneOne);
644        let content = "1. First item\n2. Second item\n3. Third item";
645        let structure = DocumentStructure::new(content);
646        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
647        let result = rule.check_with_structure(&ctx, &structure).unwrap();
648        assert_eq!(result.len(), 2); // Should have warnings for items 2 and 3
649
650        // Test with ordered0 style
651        let rule = MD029OrderedListPrefix::new(ListStyle::Ordered0);
652        let content = "0. First item\n1. Second item\n2. Third item";
653        let structure = DocumentStructure::new(content);
654        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
655        let result = rule.check_with_structure(&ctx, &structure).unwrap();
656        assert!(result.is_empty());
657    }
658
659    #[test]
660    fn test_redundant_computation_fix() {
661        // This test confirms that the redundant computation bug is fixed
662        // Previously: get_list_number() was called twice (once for is_some(), once for unwrap())
663        // Now: get_list_number() is called once with if let pattern
664
665        let rule = MD029OrderedListPrefix::default();
666
667        // Test with mixed valid and edge case content
668        let content = "1. First item\n3. Wrong number\n2. Another wrong number";
669        let structure = DocumentStructure::new(content);
670        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
671
672        // This should not panic and should produce warnings for incorrect numbering
673        let result = rule.check_with_structure(&ctx, &structure).unwrap();
674        assert_eq!(result.len(), 2); // Should have warnings for items 3 and 2
675
676        // Verify the warnings have correct content
677        assert!(result[0].message.contains("3 does not match style (expected 2)"));
678        assert!(result[1].message.contains("2 does not match style (expected 3)"));
679    }
680
681    #[test]
682    fn test_performance_improvement() {
683        // This test verifies that the fix improves performance by avoiding redundant calls
684        let rule = MD029OrderedListPrefix::default();
685
686        // Create a larger list to test performance
687        let mut content = String::new();
688        for i in 1..=100 {
689            content.push_str(&format!("{}. Item {}\n", i + 1, i)); // All wrong numbers
690        }
691
692        let structure = DocumentStructure::new(&content);
693        let ctx = crate::lint_context::LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
694
695        // This should complete without issues and produce warnings for all items
696        let result = rule.check_with_structure(&ctx, &structure).unwrap();
697        assert_eq!(result.len(), 100); // Should have warnings for all 100 items
698
699        // Verify first and last warnings
700        assert!(result[0].message.contains("2 does not match style (expected 1)"));
701        assert!(result[99].message.contains("101 does not match style (expected 100)"));
702    }
703}