rumdl_lib/rules/
md006_start_bullets.rs

1use crate::utils::range_utils::LineIndex;
2
3use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
4use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
5use crate::utils::regex_cache::UNORDERED_LIST_MARKER_REGEX;
6
7/// Rule MD006: Consider starting bulleted lists at the leftmost column
8///
9/// See [docs/md006.md](../../docs/md006.md) for full documentation, configuration, and examples.
10///
11/// In standard Markdown:
12/// - Top-level bullet items should start at column 0 (no indentation)
13/// - Nested bullet items should be indented under their parent
14/// - A bullet item following non-list content should start a new list at column 0
15#[derive(Clone)]
16pub struct MD006StartBullets;
17
18impl MD006StartBullets {
19    /// Check if a bullet is nested under an ordered list item (anywhere in the hierarchy)
20    fn is_nested_under_ordered_item(
21        &self,
22        ctx: &crate::lint_context::LintContext,
23        current_line: usize,
24        current_indent: usize,
25    ) -> bool {
26        // Look backward from current line to find any ordered ancestor
27        let mut check_indent = current_indent;
28
29        for line_idx in (1..current_line).rev() {
30            if let Some(line_info) = ctx.line_info(line_idx) {
31                if let Some(list_item) = &line_info.list_item {
32                    // Found a list item - check if it's at a lower indentation (ancestor level)
33                    if list_item.marker_column < check_indent {
34                        // This is an ancestor item
35                        if list_item.is_ordered {
36                            // Found an ordered ancestor
37                            return true;
38                        }
39                        // Continue looking for higher-level ancestors
40                        check_indent = list_item.marker_column;
41                    }
42                }
43                // If we encounter non-blank, non-list content at column 0, stop looking
44                else if !line_info.is_blank && line_info.indent == 0 {
45                    break;
46                }
47            }
48        }
49        false
50    }
51
52    /// Optimized check using centralized list blocks
53    fn check_optimized(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
54        let content = ctx.content;
55        let line_index = LineIndex::new(content.to_string());
56        let mut result = Vec::new();
57        let lines: Vec<&str> = content.lines().collect();
58
59        // Track which lines contain valid bullet items
60        let mut valid_bullet_lines = vec![false; lines.len()];
61
62        // Process each list block
63        for list_block in &ctx.list_blocks {
64            // Check each list item in this block
65            // We need to check unordered items even in mixed lists
66            for &item_line in &list_block.item_lines {
67                if let Some(line_info) = ctx.line_info(item_line)
68                    && let Some(list_item) = &line_info.list_item
69                {
70                    // Skip ordered list items - we only care about unordered ones
71                    if list_item.is_ordered {
72                        continue;
73                    }
74                    let line_idx = item_line - 1;
75                    let indent = list_item.marker_column;
76                    let line = &lines[line_idx];
77
78                    let mut is_valid = false;
79
80                    if indent == 0 {
81                        // Top-level items are always valid
82                        is_valid = true;
83                    } else {
84                        // Check if this is nested under an ordered item with correct indentation
85                        // For single-digit ordered lists (1.), need at least 3 spaces for proper nesting
86                        // For double-digit (10.), need at least 4 spaces, etc.
87                        // But MD006's purpose is to flag top-level indented lists, not validate nesting depth
88                        if self.is_nested_under_ordered_item(ctx, item_line, indent) {
89                            // It's nested under an ordered item
90                            // Only flag if indentation is less than 3 (won't nest properly in CommonMark)
91                            if indent >= 3 {
92                                is_valid = true;
93                            }
94                        } else {
95                            // Check if this is a valid nested item under another bullet
96                            match Self::find_relevant_previous_bullet(&lines, line_idx) {
97                                Some((prev_idx, prev_indent)) => {
98                                    match prev_indent.cmp(&indent) {
99                                        std::cmp::Ordering::Less | std::cmp::Ordering::Equal => {
100                                            // Valid nesting or sibling if previous item was valid
101                                            is_valid = valid_bullet_lines[prev_idx];
102                                        }
103                                        std::cmp::Ordering::Greater => {
104                                            // remains invalid
105                                        }
106                                    }
107                                }
108                                None => {
109                                    // Indented item with no previous bullet remains invalid
110                                }
111                            }
112                        }
113                    }
114
115                    valid_bullet_lines[line_idx] = is_valid;
116
117                    if !is_valid {
118                        // Calculate the precise range for the indentation that needs to be removed
119                        let start_col = 1;
120                        let end_col = indent + 3; // Include marker and space after it
121
122                        // For the fix, we need to replace the highlighted part with just the bullet marker
123                        let trimmed = line.trim_start();
124                        let bullet_part = if let Some(captures) = UNORDERED_LIST_MARKER_REGEX.captures(trimmed) {
125                            let marker = captures.get(2).map_or("*", |m| m.as_str());
126                            format!("{marker} ")
127                        } else {
128                            "* ".to_string()
129                        };
130
131                        // Calculate the byte range for the fix
132                        let fix_range =
133                            line_index.line_col_to_byte_range_with_length(item_line, start_col, end_col - start_col);
134
135                        // Generate appropriate message based on context
136                        let message = if self.is_nested_under_ordered_item(ctx, item_line, indent) {
137                            // It's trying to nest under an ordered item but has insufficient indentation
138                            format!(
139                                "Nested list needs at least 3 spaces of indentation under ordered item (found {indent})"
140                            )
141                        } else if indent > 0 {
142                            // It's indented but not nested under anything - should start at column 0
143                            format!(
144                                "Consider starting bulleted lists at the beginning of the line (found {indent} leading spaces)"
145                            )
146                        } else {
147                            // Shouldn't happen, but just in case
148                            format!("List indentation issue (found {indent} leading spaces)")
149                        };
150
151                        result.push(LintWarning {
152                            line: item_line,
153                            column: start_col,
154                            end_line: item_line,
155                            end_column: end_col,
156                            message,
157                            severity: Severity::Warning,
158                            rule_name: Some(self.name()),
159                            fix: Some(Fix {
160                                range: fix_range,
161                                replacement: bullet_part,
162                            }),
163                        });
164                    }
165                }
166            }
167        }
168
169        Ok(result)
170    }
171    /// Checks if a line is a bullet list item and returns its indentation level
172    fn is_bullet_list_item(line: &str) -> Option<usize> {
173        if let Some(captures) = UNORDERED_LIST_MARKER_REGEX.captures(line)
174            && let Some(indent) = captures.get(1)
175        {
176            return Some(indent.as_str().len());
177        }
178        None
179    }
180
181    /// Checks if a line is blank (empty or whitespace only)
182    fn is_blank_line(line: &str) -> bool {
183        line.trim().is_empty()
184    }
185
186    /// Find the most relevant previous bullet item for nesting validation
187    fn find_relevant_previous_bullet(lines: &[&str], line_idx: usize) -> Option<(usize, usize)> {
188        let current_indent = Self::is_bullet_list_item(lines[line_idx])?;
189
190        let mut i = line_idx;
191
192        while i > 0 {
193            i -= 1;
194            if Self::is_blank_line(lines[i]) {
195                continue;
196            }
197            if let Some(prev_indent) = Self::is_bullet_list_item(lines[i]) {
198                if prev_indent <= current_indent {
199                    // Found a potential parent or sibling
200                    // Check if there's any non-list content between this potential parent and current item
201                    let mut has_breaking_content = false;
202                    for check_line in &lines[(i + 1)..line_idx] {
203                        if Self::is_blank_line(check_line) {
204                            continue;
205                        }
206                        if Self::is_bullet_list_item(check_line).is_none() {
207                            // Found non-list content - check if it breaks the list structure
208                            let content_indent = check_line.len() - check_line.trim_start().len();
209
210                            // Content is acceptable if:
211                            // 1. It's indented at least as much as the current item (continuation of parent)
212                            // 2. OR it's indented more than the previous bullet (continuation of previous item)
213                            // 3. AND we have a true parent relationship (prev_indent < current_indent)
214                            let is_continuation = content_indent >= prev_indent.max(2); // At least 2 spaces for continuation
215                            let is_valid_nesting = prev_indent < current_indent;
216
217                            if !is_continuation || !is_valid_nesting {
218                                has_breaking_content = true;
219                                break;
220                            }
221                        }
222                    }
223
224                    if !has_breaking_content {
225                        return Some((i, prev_indent));
226                    } else {
227                        // Content breaks the list structure, but continue searching for an earlier valid parent
228                        continue;
229                    }
230                }
231                // If prev_indent > current_indent, it's a child of a sibling, ignore it and keep searching.
232            } else {
233                // Found non-list content - check if it's a continuation line
234                let content_indent = lines[i].len() - lines[i].trim_start().len();
235                // If it's indented enough to be a continuation, don't break the search
236                if content_indent >= 2 {
237                    continue;
238                }
239                // Otherwise, this breaks the search
240                return None;
241            }
242        }
243        None
244    }
245}
246
247impl Rule for MD006StartBullets {
248    fn name(&self) -> &'static str {
249        "MD006"
250    }
251
252    fn description(&self) -> &'static str {
253        "Consider starting bulleted lists at the beginning of the line"
254    }
255
256    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
257        let content = ctx.content;
258
259        // Early returns for performance
260        if content.is_empty() || ctx.list_blocks.is_empty() {
261            return Ok(Vec::new());
262        }
263
264        // Quick check for any list markers before processing
265        if !content.contains('*') && !content.contains('-') && !content.contains('+') {
266            return Ok(Vec::new());
267        }
268
269        // Use centralized list blocks for better performance and consistency
270        self.check_optimized(ctx)
271    }
272
273    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
274        let content = ctx.content;
275        let _line_index = LineIndex::new(content.to_string());
276
277        let warnings = self.check(ctx)?;
278        if warnings.is_empty() {
279            return Ok(content.to_string());
280        }
281
282        let lines: Vec<&str> = content.lines().collect();
283
284        let mut fixed_lines: Vec<String> = Vec::with_capacity(lines.len());
285
286        // Create a map of line numbers to replacements
287
288        let mut line_replacements = std::collections::HashMap::new();
289        for warning in warnings {
290            if let Some(fix) = warning.fix {
291                // Line number is 1-based in warnings but we need 0-based for indexing
292                let line_idx = warning.line - 1;
293                line_replacements.insert(line_idx, fix.replacement);
294            }
295        }
296
297        // Apply replacements line by line
298
299        let mut i = 0;
300        while i < lines.len() {
301            if let Some(_replacement) = line_replacements.get(&i) {
302                let prev_line_is_blank = i > 0 && Self::is_blank_line(lines[i - 1]);
303                let prev_line_is_list = i > 0 && Self::is_bullet_list_item(lines[i - 1]).is_some();
304                // Only insert a blank line if previous line is not blank and not a list
305                if !prev_line_is_blank && !prev_line_is_list && i > 0 {
306                    fixed_lines.push(String::new());
307                }
308                // The replacement is the fixed line (unindented list item)
309                // Use the original line, trimmed of leading whitespace
310                let fixed_line = lines[i].trim_start();
311                fixed_lines.push(fixed_line.to_string());
312            } else {
313                fixed_lines.push(lines[i].to_string());
314            }
315            i += 1;
316        }
317
318        // Join the lines with newlines
319
320        let result = fixed_lines.join("\n");
321        if content.ends_with('\n') {
322            Ok(result + "\n")
323        } else {
324            Ok(result)
325        }
326    }
327
328    /// Optimized check using document structure
329    fn check_with_structure(
330        &self,
331        _ctx: &crate::lint_context::LintContext,
332        doc_structure: &DocumentStructure,
333    ) -> LintResult {
334        let content = _ctx.content;
335        if doc_structure.list_lines.is_empty() {
336            return Ok(Vec::new());
337        }
338        if !content.contains('*') && !content.contains('-') && !content.contains('+') {
339            return Ok(Vec::new());
340        }
341        let line_index = LineIndex::new(content.to_string());
342        let mut result = Vec::new();
343        let lines: Vec<&str> = content.lines().collect();
344        let mut valid_bullet_lines = vec![false; lines.len()];
345        for &line_num in &doc_structure.list_lines {
346            let line_idx = line_num - 1;
347            if line_idx >= lines.len() {
348                continue;
349            }
350            let line = lines[line_idx];
351            if doc_structure.is_in_code_block(line_num) {
352                continue;
353            }
354            if let Some(indent) = Self::is_bullet_list_item(line) {
355                let mut is_valid = false; // Assume invalid initially
356                if indent == 0 {
357                    is_valid = true;
358                } else {
359                    match Self::find_relevant_previous_bullet(&lines, line_idx) {
360                        Some((prev_idx, prev_indent)) => {
361                            match prev_indent.cmp(&indent) {
362                                std::cmp::Ordering::Less | std::cmp::Ordering::Equal => {
363                                    // Valid nesting or sibling if previous item was valid
364                                    is_valid = valid_bullet_lines[prev_idx];
365                                }
366                                std::cmp::Ordering::Greater => {
367                                    // remains invalid
368                                }
369                            }
370                        }
371                        None => {
372                            // Indented item with no previous bullet remains invalid
373                        }
374                    }
375                }
376                valid_bullet_lines[line_idx] = is_valid;
377
378                if !is_valid {
379                    // Calculate the precise range for the indentation that needs to be removed
380                    // For "  * Indented bullet", we want to highlight the indentation, marker, and space after marker "  * " (columns 1-4)
381                    let start_col = 1; // Start from beginning of line
382                    let end_col = indent + 3; // Include marker and space after it (indent + 1 for marker + 1 for space + 1 for inclusive range)
383
384                    // For the fix, we need to replace the highlighted part ("  *") with just the bullet marker ("* ")
385                    let line = lines[line_idx];
386                    let trimmed = line.trim_start();
387                    // Extract just the bullet marker and normalize to single space
388                    let bullet_part = if let Some(captures) = UNORDERED_LIST_MARKER_REGEX.captures(trimmed) {
389                        format!("{} ", captures.get(2).unwrap().as_str()) // Always use single space
390                    } else {
391                        "* ".to_string() // fallback
392                    };
393                    let replacement = bullet_part;
394
395                    result.push(LintWarning {
396                        rule_name: Some(self.name()),
397                        severity: Severity::Warning,
398                        line: line_num,
399                        column: start_col,
400                        end_line: line_num,
401                        end_column: end_col,
402                        message: "List item indentation".to_string(),
403                        fix: Some(Fix {
404                            range: {
405                                let start_byte = line_index.line_col_to_byte_range(line_num, start_col).start;
406                                let end_byte = line_index.line_col_to_byte_range(line_num, end_col).start;
407                                start_byte..end_byte
408                            },
409                            replacement,
410                        }),
411                    });
412                }
413            }
414        }
415        Ok(result)
416    }
417
418    /// Get the category of this rule for selective processing
419    fn category(&self) -> RuleCategory {
420        RuleCategory::List
421    }
422
423    /// Check if this rule should be skipped
424    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
425        let content = ctx.content;
426        content.is_empty() || (!content.contains('*') && !content.contains('-') && !content.contains('+'))
427    }
428
429    fn as_any(&self) -> &dyn std::any::Any {
430        self
431    }
432
433    fn as_maybe_document_structure(&self) -> Option<&dyn crate::rule::MaybeDocumentStructure> {
434        None
435    }
436
437    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
438    where
439        Self: Sized,
440    {
441        Box::new(MD006StartBullets)
442    }
443
444    fn default_config_section(&self) -> Option<(String, toml::Value)> {
445        None
446    }
447}
448
449impl DocumentStructureExtensions for MD006StartBullets {
450    fn has_relevant_elements(
451        &self,
452        ctx: &crate::lint_context::LintContext,
453        _doc_structure: &DocumentStructure,
454    ) -> bool {
455        // This rule is only relevant if there are unordered list items
456        ctx.list_blocks.iter().any(|block| !block.is_ordered)
457    }
458}
459
460#[cfg(test)]
461mod tests {
462    use super::*;
463
464    #[test]
465    fn test_with_document_structure() {
466        let rule = MD006StartBullets;
467
468        // Test with properly formatted lists
469        let content_valid = "* Item 1\n* Item 2\n  * Nested item\n  * Another nested item";
470        let structure_valid = DocumentStructure::new(content_valid);
471        let ctx_valid = crate::lint_context::LintContext::new(content_valid, crate::config::MarkdownFlavor::Standard);
472        let result_valid = rule.check_with_structure(&ctx_valid, &structure_valid).unwrap();
473        assert!(
474            result_valid.is_empty(),
475            "Properly formatted lists should not generate warnings, found: {result_valid:?}"
476        );
477
478        // Test with improperly indented list - adjust expectations based on actual implementation
479        let content_invalid = "  * Item 1\n  * Item 2\n    * Nested item";
480        let structure = DocumentStructure::new(content_invalid);
481        let ctx_invalid =
482            crate::lint_context::LintContext::new(content_invalid, crate::config::MarkdownFlavor::Standard);
483        let result = rule.check_with_structure(&ctx_invalid, &structure).unwrap();
484
485        // If no warnings are generated, the test should be updated to match implementation behavior
486        assert!(!result.is_empty(), "Improperly indented lists should generate warnings");
487        assert_eq!(
488            result.len(),
489            2,
490            "Should generate warnings for both improperly indented top-level items"
491        );
492
493        // Test with mixed indentation - standard nesting is VALID
494        let content = "* Item 1\n  * Item 2 (standard nesting is valid)";
495        let structure = DocumentStructure::new(content);
496        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
497        let result = rule.check_with_structure(&ctx, &structure).unwrap();
498        // Assert that standard nesting does NOT generate warnings
499        assert!(
500            result.is_empty(),
501            "Standard nesting (* Item ->   * Item) should NOT generate warnings, found: {result:?}"
502        );
503    }
504
505    #[test]
506    fn test_bullets_nested_under_numbered_items() {
507        let rule = MD006StartBullets;
508        let content = "\
5091. **Active Directory/LDAP**
510   - User authentication and directory services
511   - LDAP for user information and validation
512
5132. **Oracle Unified Directory (OUD)**
514   - Extended user directory services";
515        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
516        let result = rule.check(&ctx).unwrap();
517        // Should have no warnings - 3 spaces is valid for bullets under numbered items
518        assert!(
519            result.is_empty(),
520            "Expected no warnings for bullets with 3 spaces under numbered items, got: {result:?}"
521        );
522    }
523
524    #[test]
525    fn test_bullets_nested_under_numbered_items_wrong_indent() {
526        let rule = MD006StartBullets;
527        let content = "\
5281. **Active Directory/LDAP**
529  - Wrong: only 2 spaces
530 - Wrong: only 1 space";
531        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
532        let result = rule.check(&ctx).unwrap();
533        // Should flag the incorrect indentations (less than 3 spaces)
534        assert_eq!(
535            result.len(),
536            2,
537            "Expected warnings for bullets with insufficient spacing under numbered items"
538        );
539        assert!(result.iter().any(|w| w.line == 2));
540        assert!(result.iter().any(|w| w.line == 3));
541    }
542
543    #[test]
544    fn test_regular_bullet_nesting_still_works() {
545        let rule = MD006StartBullets;
546        let content = "\
547* Top level
548  * Nested bullet (2 spaces is correct)
549    * Deeply nested (4 spaces)";
550        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
551        let result = rule.check(&ctx).unwrap();
552        // Should have no warnings - standard bullet nesting still works
553        assert!(
554            result.is_empty(),
555            "Expected no warnings for standard bullet nesting, got: {result:?}"
556        );
557    }
558}