rumdl_lib/rules/
md006_start_bullets.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::regex_cache::UNORDERED_LIST_MARKER_REGEX;
3
4/// Rule MD006: Consider starting bulleted lists at the leftmost column
5///
6/// See [docs/md006.md](../../docs/md006.md) for full documentation, configuration, and examples.
7///
8/// In standard Markdown:
9/// - Top-level bullet items should start at column 0 (no indentation)
10/// - Nested bullet items should be indented under their parent
11/// - A bullet item following non-list content should start a new list at column 0
12#[derive(Clone)]
13pub struct MD006StartBullets;
14
15impl MD006StartBullets {
16    /// Check if a bullet is nested under an ordered list item (anywhere in the hierarchy)
17    fn is_nested_under_ordered_item(
18        &self,
19        ctx: &crate::lint_context::LintContext,
20        current_line: usize,
21        current_indent: usize,
22    ) -> bool {
23        // Look backward from current line to find any ordered ancestor
24        let mut check_indent = current_indent;
25
26        for line_idx in (1..current_line).rev() {
27            if let Some(line_info) = ctx.line_info(line_idx) {
28                if let Some(list_item) = &line_info.list_item {
29                    // Found a list item - check if it's at a lower indentation (ancestor level)
30                    if list_item.marker_column < check_indent {
31                        // This is an ancestor item
32                        if list_item.is_ordered {
33                            // Found an ordered ancestor
34                            return true;
35                        }
36                        // Continue looking for higher-level ancestors
37                        check_indent = list_item.marker_column;
38                    }
39                }
40                // If we encounter non-blank, non-list content at column 0, stop looking
41                else if !line_info.is_blank && line_info.indent == 0 {
42                    break;
43                }
44            }
45        }
46        false
47    }
48
49    /// Optimized check using centralized list blocks
50    fn check_optimized(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
51        let content = ctx.content;
52        let line_index = &ctx.line_index;
53        let mut result = Vec::new();
54        let lines: Vec<&str> = content.lines().collect();
55
56        // Track which lines contain valid bullet items
57        let mut valid_bullet_lines = vec![false; lines.len()];
58
59        // Process each list block
60        for list_block in &ctx.list_blocks {
61            // Check each list item in this block
62            // We need to check unordered items even in mixed lists
63            for &item_line in &list_block.item_lines {
64                if let Some(line_info) = ctx.line_info(item_line)
65                    && let Some(list_item) = &line_info.list_item
66                {
67                    // Skip ordered list items - we only care about unordered ones
68                    if list_item.is_ordered {
69                        continue;
70                    }
71
72                    // Skip list items inside blockquotes - they're supposed to be indented
73                    if line_info.blockquote.is_some() {
74                        continue;
75                    }
76
77                    let line_idx = item_line - 1;
78                    let indent = list_item.marker_column;
79                    let line = &lines[line_idx];
80
81                    let mut is_valid = false;
82
83                    if indent == 0 {
84                        // Top-level items are always valid
85                        is_valid = true;
86                    } else {
87                        // Check if this is nested under an ordered item with correct indentation
88                        // For single-digit ordered lists (1.), need at least 3 spaces for proper nesting
89                        // For double-digit (10.), need at least 4 spaces, etc.
90                        // But MD006's purpose is to flag top-level indented lists, not validate nesting depth
91                        if self.is_nested_under_ordered_item(ctx, item_line, indent) {
92                            // It's nested under an ordered item
93                            // Only flag if indentation is less than 3 (won't nest properly in CommonMark)
94                            if indent >= 3 {
95                                is_valid = true;
96                            }
97                        } else {
98                            // Check if this is a valid nested item under another bullet
99                            match Self::find_relevant_previous_bullet(&lines, line_idx) {
100                                Some((prev_idx, prev_indent)) => {
101                                    match prev_indent.cmp(&indent) {
102                                        std::cmp::Ordering::Less | std::cmp::Ordering::Equal => {
103                                            // Valid nesting or sibling if previous item was valid
104                                            is_valid = valid_bullet_lines[prev_idx];
105                                        }
106                                        std::cmp::Ordering::Greater => {
107                                            // remains invalid
108                                        }
109                                    }
110                                }
111                                None => {
112                                    // Indented item with no previous bullet remains invalid
113                                }
114                            }
115                        }
116                    }
117
118                    valid_bullet_lines[line_idx] = is_valid;
119
120                    if !is_valid {
121                        // Calculate the precise range for the indentation that needs to be removed
122                        let start_col = 1;
123                        let end_col = indent + 3; // Include marker and space after it
124
125                        // For the fix, we need to replace the highlighted part with just the bullet marker
126                        let trimmed = line.trim_start();
127                        let bullet_part = if let Some(captures) = UNORDERED_LIST_MARKER_REGEX.captures(trimmed) {
128                            let marker = captures.get(2).map_or("*", |m| m.as_str());
129                            format!("{marker} ")
130                        } else {
131                            "* ".to_string()
132                        };
133
134                        // Calculate the byte range for the fix
135                        let fix_range =
136                            line_index.line_col_to_byte_range_with_length(item_line, start_col, end_col - start_col);
137
138                        // Generate appropriate message based on context
139                        let message = if self.is_nested_under_ordered_item(ctx, item_line, indent) {
140                            // It's trying to nest under an ordered item but has insufficient indentation
141                            format!(
142                                "Nested list needs at least 3 spaces of indentation under ordered item (found {indent})"
143                            )
144                        } else if indent > 0 {
145                            // It's indented but not nested under anything - should start at column 0
146                            format!(
147                                "Consider starting bulleted lists at the beginning of the line (found {indent} leading spaces)"
148                            )
149                        } else {
150                            // Shouldn't happen, but just in case
151                            format!("List indentation issue (found {indent} leading spaces)")
152                        };
153
154                        result.push(LintWarning {
155                            line: item_line,
156                            column: start_col,
157                            end_line: item_line,
158                            end_column: end_col,
159                            message,
160                            severity: Severity::Warning,
161                            rule_name: Some(self.name().to_string()),
162                            fix: Some(Fix {
163                                range: fix_range,
164                                replacement: bullet_part,
165                            }),
166                        });
167                    }
168                }
169            }
170        }
171
172        Ok(result)
173    }
174    /// Checks if a line is a bullet list item and returns its indentation level
175    fn is_bullet_list_item(line: &str) -> Option<usize> {
176        if let Some(captures) = UNORDERED_LIST_MARKER_REGEX.captures(line)
177            && let Some(indent) = captures.get(1)
178        {
179            return Some(indent.as_str().len());
180        }
181        None
182    }
183
184    /// Checks if a line is blank (empty or whitespace only)
185    fn is_blank_line(line: &str) -> bool {
186        line.trim().is_empty()
187    }
188
189    /// Find the most relevant previous bullet item for nesting validation
190    fn find_relevant_previous_bullet(lines: &[&str], line_idx: usize) -> Option<(usize, usize)> {
191        let current_indent = Self::is_bullet_list_item(lines[line_idx])?;
192
193        let mut i = line_idx;
194
195        while i > 0 {
196            i -= 1;
197            if Self::is_blank_line(lines[i]) {
198                continue;
199            }
200            if let Some(prev_indent) = Self::is_bullet_list_item(lines[i]) {
201                if prev_indent <= current_indent {
202                    // Found a potential parent or sibling
203                    // Check if there's any non-list content between this potential parent and current item
204                    let mut has_breaking_content = false;
205                    for check_line in &lines[(i + 1)..line_idx] {
206                        if Self::is_blank_line(check_line) {
207                            continue;
208                        }
209                        if Self::is_bullet_list_item(check_line).is_none() {
210                            // Found non-list content - check if it breaks the list structure
211                            let content_indent = check_line.len() - check_line.trim_start().len();
212
213                            // Content is acceptable if:
214                            // 1. It's indented at least as much as the current item (continuation of parent)
215                            // 2. OR it's indented more than the previous bullet (continuation of previous item)
216                            // 3. AND we have a true parent relationship (prev_indent < current_indent)
217                            let is_continuation = content_indent >= prev_indent.max(2); // At least 2 spaces for continuation
218                            let is_valid_nesting = prev_indent < current_indent;
219
220                            if !is_continuation || !is_valid_nesting {
221                                has_breaking_content = true;
222                                break;
223                            }
224                        }
225                    }
226
227                    if !has_breaking_content {
228                        return Some((i, prev_indent));
229                    } else {
230                        // Content breaks the list structure, but continue searching for an earlier valid parent
231                        continue;
232                    }
233                }
234                // If prev_indent > current_indent, it's a child of a sibling, ignore it and keep searching.
235            } else {
236                // Found non-list content - check if it's a continuation line
237                let content_indent = lines[i].len() - lines[i].trim_start().len();
238                // If it's indented enough to be a continuation, don't break the search
239                if content_indent >= 2 {
240                    continue;
241                }
242                // Otherwise, this breaks the search
243                return None;
244            }
245        }
246        None
247    }
248}
249
250impl Rule for MD006StartBullets {
251    fn name(&self) -> &'static str {
252        "MD006"
253    }
254
255    fn description(&self) -> &'static str {
256        "Consider starting bulleted lists at the beginning of the line"
257    }
258
259    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
260        let content = ctx.content;
261
262        // Early returns for performance
263        if content.is_empty() || ctx.list_blocks.is_empty() {
264            return Ok(Vec::new());
265        }
266
267        // Quick check for any list markers before processing
268        if !content.contains('*') && !content.contains('-') && !content.contains('+') {
269            return Ok(Vec::new());
270        }
271
272        // Use centralized list blocks for better performance and consistency
273        self.check_optimized(ctx)
274    }
275
276    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
277        let content = ctx.content;
278        let _line_index = &ctx.line_index;
279
280        let warnings = self.check(ctx)?;
281        if warnings.is_empty() {
282            return Ok(content.to_string());
283        }
284
285        let lines: Vec<&str> = content.lines().collect();
286
287        let mut fixed_lines: Vec<String> = Vec::with_capacity(lines.len());
288
289        // Create a map of line numbers to replacements
290
291        let mut line_replacements = std::collections::HashMap::new();
292        for warning in warnings {
293            if let Some(fix) = warning.fix {
294                // Line number is 1-based in warnings but we need 0-based for indexing
295                let line_idx = warning.line - 1;
296                line_replacements.insert(line_idx, fix.replacement);
297            }
298        }
299
300        // Apply replacements line by line
301
302        let mut i = 0;
303        while i < lines.len() {
304            if let Some(_replacement) = line_replacements.get(&i) {
305                let prev_line_is_blank = i > 0 && Self::is_blank_line(lines[i - 1]);
306                let prev_line_is_list = i > 0 && Self::is_bullet_list_item(lines[i - 1]).is_some();
307                // Only insert a blank line if previous line is not blank and not a list
308                if !prev_line_is_blank && !prev_line_is_list && i > 0 {
309                    fixed_lines.push(String::new());
310                }
311                // The replacement is the fixed line (unindented list item)
312                // Use the original line, trimmed of leading whitespace
313                let fixed_line = lines[i].trim_start();
314                fixed_lines.push(fixed_line.to_string());
315            } else {
316                fixed_lines.push(lines[i].to_string());
317            }
318            i += 1;
319        }
320
321        // Join the lines with newlines
322
323        let result = fixed_lines.join("\n");
324        if content.ends_with('\n') {
325            Ok(result + "\n")
326        } else {
327            Ok(result)
328        }
329    }
330
331    /// Get the category of this rule for selective processing
332    fn category(&self) -> RuleCategory {
333        RuleCategory::List
334    }
335
336    /// Check if this rule should be skipped
337    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
338        ctx.content.is_empty() || !ctx.likely_has_lists()
339    }
340
341    fn as_any(&self) -> &dyn std::any::Any {
342        self
343    }
344
345    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
346    where
347        Self: Sized,
348    {
349        Box::new(MD006StartBullets)
350    }
351
352    fn default_config_section(&self) -> Option<(String, toml::Value)> {
353        None
354    }
355}
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360
361    #[test]
362    fn test_with_lint_context() {
363        let rule = MD006StartBullets;
364
365        // Test with properly formatted lists
366        let content_valid = "* Item 1\n* Item 2\n  * Nested item\n  * Another nested item";
367        let ctx_valid = crate::lint_context::LintContext::new(content_valid, crate::config::MarkdownFlavor::Standard);
368        let result_valid = rule.check(&ctx_valid).unwrap();
369        assert!(
370            result_valid.is_empty(),
371            "Properly formatted lists should not generate warnings, found: {result_valid:?}"
372        );
373
374        // Test with improperly indented list - adjust expectations based on actual implementation
375        let content_invalid = "  * Item 1\n  * Item 2\n    * Nested item";
376        let ctx_invalid =
377            crate::lint_context::LintContext::new(content_invalid, crate::config::MarkdownFlavor::Standard);
378        let result = rule.check(&ctx_invalid).unwrap();
379
380        // If no warnings are generated, the test should be updated to match implementation behavior
381        assert!(!result.is_empty(), "Improperly indented lists should generate warnings");
382        assert_eq!(
383            result.len(),
384            3,
385            "Should generate warnings for all improperly indented items (2 top-level + 1 nested)"
386        );
387
388        // Test with mixed indentation - standard nesting is VALID
389        let content = "* Item 1\n  * Item 2 (standard nesting is valid)";
390        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
391        let result = rule.check(&ctx).unwrap();
392        // Assert that standard nesting does NOT generate warnings
393        assert!(
394            result.is_empty(),
395            "Standard nesting (* Item ->   * Item) should NOT generate warnings, found: {result:?}"
396        );
397    }
398
399    #[test]
400    fn test_bullets_nested_under_numbered_items() {
401        let rule = MD006StartBullets;
402        let content = "\
4031. **Active Directory/LDAP**
404   - User authentication and directory services
405   - LDAP for user information and validation
406
4072. **Oracle Unified Directory (OUD)**
408   - Extended user directory services";
409        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
410        let result = rule.check(&ctx).unwrap();
411        // Should have no warnings - 3 spaces is valid for bullets under numbered items
412        assert!(
413            result.is_empty(),
414            "Expected no warnings for bullets with 3 spaces under numbered items, got: {result:?}"
415        );
416    }
417
418    #[test]
419    fn test_bullets_nested_under_numbered_items_wrong_indent() {
420        let rule = MD006StartBullets;
421        let content = "\
4221. **Active Directory/LDAP**
423  - Wrong: only 2 spaces
424 - Wrong: only 1 space";
425        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
426        let result = rule.check(&ctx).unwrap();
427        // Should flag the incorrect indentations (less than 3 spaces)
428        assert_eq!(
429            result.len(),
430            2,
431            "Expected warnings for bullets with insufficient spacing under numbered items"
432        );
433        assert!(result.iter().any(|w| w.line == 2));
434        assert!(result.iter().any(|w| w.line == 3));
435    }
436
437    #[test]
438    fn test_regular_bullet_nesting_still_works() {
439        let rule = MD006StartBullets;
440        let content = "\
441* Top level
442  * Nested bullet (2 spaces is correct)
443    * Deeply nested (4 spaces)";
444        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
445        let result = rule.check(&ctx).unwrap();
446        // Should have no warnings - standard bullet nesting still works
447        assert!(
448            result.is_empty(),
449            "Expected no warnings for standard bullet nesting, got: {result:?}"
450        );
451    }
452}