rumdl_lib/rules/
md006_start_bullets.rs

1use crate::utils::range_utils::LineIndex;
2
3use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
4use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
5use crate::utils::regex_cache::UNORDERED_LIST_MARKER_REGEX;
6
7/// Rule MD006: Consider starting bulleted lists at the leftmost column
8///
9/// See [docs/md006.md](../../docs/md006.md) for full documentation, configuration, and examples.
10///
11/// In standard Markdown:
12/// - Top-level bullet items should start at column 0 (no indentation)
13/// - Nested bullet items should be indented under their parent
14/// - A bullet item following non-list content should start a new list at column 0
15#[derive(Clone)]
16pub struct MD006StartBullets;
17
18impl MD006StartBullets {
19    /// Check if a bullet is nested under an ordered list item (anywhere in the hierarchy)
20    fn is_nested_under_ordered_item(
21        &self,
22        ctx: &crate::lint_context::LintContext,
23        current_line: usize,
24        current_indent: usize,
25    ) -> bool {
26        // Look backward from current line to find any ordered ancestor
27        let mut check_indent = current_indent;
28
29        for line_idx in (1..current_line).rev() {
30            if let Some(line_info) = ctx.line_info(line_idx) {
31                if let Some(list_item) = &line_info.list_item {
32                    // Found a list item - check if it's at a lower indentation (ancestor level)
33                    if list_item.marker_column < check_indent {
34                        // This is an ancestor item
35                        if list_item.is_ordered {
36                            // Found an ordered ancestor
37                            return true;
38                        }
39                        // Continue looking for higher-level ancestors
40                        check_indent = list_item.marker_column;
41                    }
42                }
43                // If we encounter non-blank, non-list content at column 0, stop looking
44                else if !line_info.is_blank && line_info.indent == 0 {
45                    break;
46                }
47            }
48        }
49        false
50    }
51
52    /// Optimized check using centralized list blocks
53    fn check_optimized(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
54        let content = ctx.content;
55        let line_index = LineIndex::new(content.to_string());
56        let mut result = Vec::new();
57        let lines: Vec<&str> = content.lines().collect();
58
59        // Track which lines contain valid bullet items
60        let mut valid_bullet_lines = vec![false; lines.len()];
61
62        // Process each list block
63        for list_block in &ctx.list_blocks {
64            // Check each list item in this block
65            // We need to check unordered items even in mixed lists
66            for &item_line in &list_block.item_lines {
67                if let Some(line_info) = ctx.line_info(item_line)
68                    && let Some(list_item) = &line_info.list_item
69                {
70                    // Skip ordered list items - we only care about unordered ones
71                    if list_item.is_ordered {
72                        continue;
73                    }
74
75                    // Skip list items inside blockquotes - they're supposed to be indented
76                    if line_info.blockquote.is_some() {
77                        continue;
78                    }
79
80                    let line_idx = item_line - 1;
81                    let indent = list_item.marker_column;
82                    let line = &lines[line_idx];
83
84                    let mut is_valid = false;
85
86                    if indent == 0 {
87                        // Top-level items are always valid
88                        is_valid = true;
89                    } else {
90                        // Check if this is nested under an ordered item with correct indentation
91                        // For single-digit ordered lists (1.), need at least 3 spaces for proper nesting
92                        // For double-digit (10.), need at least 4 spaces, etc.
93                        // But MD006's purpose is to flag top-level indented lists, not validate nesting depth
94                        if self.is_nested_under_ordered_item(ctx, item_line, indent) {
95                            // It's nested under an ordered item
96                            // Only flag if indentation is less than 3 (won't nest properly in CommonMark)
97                            if indent >= 3 {
98                                is_valid = true;
99                            }
100                        } else {
101                            // Check if this is a valid nested item under another bullet
102                            match Self::find_relevant_previous_bullet(&lines, line_idx) {
103                                Some((prev_idx, prev_indent)) => {
104                                    match prev_indent.cmp(&indent) {
105                                        std::cmp::Ordering::Less | std::cmp::Ordering::Equal => {
106                                            // Valid nesting or sibling if previous item was valid
107                                            is_valid = valid_bullet_lines[prev_idx];
108                                        }
109                                        std::cmp::Ordering::Greater => {
110                                            // remains invalid
111                                        }
112                                    }
113                                }
114                                None => {
115                                    // Indented item with no previous bullet remains invalid
116                                }
117                            }
118                        }
119                    }
120
121                    valid_bullet_lines[line_idx] = is_valid;
122
123                    if !is_valid {
124                        // Calculate the precise range for the indentation that needs to be removed
125                        let start_col = 1;
126                        let end_col = indent + 3; // Include marker and space after it
127
128                        // For the fix, we need to replace the highlighted part with just the bullet marker
129                        let trimmed = line.trim_start();
130                        let bullet_part = if let Some(captures) = UNORDERED_LIST_MARKER_REGEX.captures(trimmed) {
131                            let marker = captures.get(2).map_or("*", |m| m.as_str());
132                            format!("{marker} ")
133                        } else {
134                            "* ".to_string()
135                        };
136
137                        // Calculate the byte range for the fix
138                        let fix_range =
139                            line_index.line_col_to_byte_range_with_length(item_line, start_col, end_col - start_col);
140
141                        // Generate appropriate message based on context
142                        let message = if self.is_nested_under_ordered_item(ctx, item_line, indent) {
143                            // It's trying to nest under an ordered item but has insufficient indentation
144                            format!(
145                                "Nested list needs at least 3 spaces of indentation under ordered item (found {indent})"
146                            )
147                        } else if indent > 0 {
148                            // It's indented but not nested under anything - should start at column 0
149                            format!(
150                                "Consider starting bulleted lists at the beginning of the line (found {indent} leading spaces)"
151                            )
152                        } else {
153                            // Shouldn't happen, but just in case
154                            format!("List indentation issue (found {indent} leading spaces)")
155                        };
156
157                        result.push(LintWarning {
158                            line: item_line,
159                            column: start_col,
160                            end_line: item_line,
161                            end_column: end_col,
162                            message,
163                            severity: Severity::Warning,
164                            rule_name: Some(self.name()),
165                            fix: Some(Fix {
166                                range: fix_range,
167                                replacement: bullet_part,
168                            }),
169                        });
170                    }
171                }
172            }
173        }
174
175        Ok(result)
176    }
177    /// Checks if a line is a bullet list item and returns its indentation level
178    fn is_bullet_list_item(line: &str) -> Option<usize> {
179        if let Some(captures) = UNORDERED_LIST_MARKER_REGEX.captures(line)
180            && let Some(indent) = captures.get(1)
181        {
182            return Some(indent.as_str().len());
183        }
184        None
185    }
186
187    /// Checks if a line is blank (empty or whitespace only)
188    fn is_blank_line(line: &str) -> bool {
189        line.trim().is_empty()
190    }
191
192    /// Find the most relevant previous bullet item for nesting validation
193    fn find_relevant_previous_bullet(lines: &[&str], line_idx: usize) -> Option<(usize, usize)> {
194        let current_indent = Self::is_bullet_list_item(lines[line_idx])?;
195
196        let mut i = line_idx;
197
198        while i > 0 {
199            i -= 1;
200            if Self::is_blank_line(lines[i]) {
201                continue;
202            }
203            if let Some(prev_indent) = Self::is_bullet_list_item(lines[i]) {
204                if prev_indent <= current_indent {
205                    // Found a potential parent or sibling
206                    // Check if there's any non-list content between this potential parent and current item
207                    let mut has_breaking_content = false;
208                    for check_line in &lines[(i + 1)..line_idx] {
209                        if Self::is_blank_line(check_line) {
210                            continue;
211                        }
212                        if Self::is_bullet_list_item(check_line).is_none() {
213                            // Found non-list content - check if it breaks the list structure
214                            let content_indent = check_line.len() - check_line.trim_start().len();
215
216                            // Content is acceptable if:
217                            // 1. It's indented at least as much as the current item (continuation of parent)
218                            // 2. OR it's indented more than the previous bullet (continuation of previous item)
219                            // 3. AND we have a true parent relationship (prev_indent < current_indent)
220                            let is_continuation = content_indent >= prev_indent.max(2); // At least 2 spaces for continuation
221                            let is_valid_nesting = prev_indent < current_indent;
222
223                            if !is_continuation || !is_valid_nesting {
224                                has_breaking_content = true;
225                                break;
226                            }
227                        }
228                    }
229
230                    if !has_breaking_content {
231                        return Some((i, prev_indent));
232                    } else {
233                        // Content breaks the list structure, but continue searching for an earlier valid parent
234                        continue;
235                    }
236                }
237                // If prev_indent > current_indent, it's a child of a sibling, ignore it and keep searching.
238            } else {
239                // Found non-list content - check if it's a continuation line
240                let content_indent = lines[i].len() - lines[i].trim_start().len();
241                // If it's indented enough to be a continuation, don't break the search
242                if content_indent >= 2 {
243                    continue;
244                }
245                // Otherwise, this breaks the search
246                return None;
247            }
248        }
249        None
250    }
251}
252
253impl Rule for MD006StartBullets {
254    fn name(&self) -> &'static str {
255        "MD006"
256    }
257
258    fn description(&self) -> &'static str {
259        "Consider starting bulleted lists at the beginning of the line"
260    }
261
262    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
263        let content = ctx.content;
264
265        // Early returns for performance
266        if content.is_empty() || ctx.list_blocks.is_empty() {
267            return Ok(Vec::new());
268        }
269
270        // Quick check for any list markers before processing
271        if !content.contains('*') && !content.contains('-') && !content.contains('+') {
272            return Ok(Vec::new());
273        }
274
275        // Use centralized list blocks for better performance and consistency
276        self.check_optimized(ctx)
277    }
278
279    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
280        let content = ctx.content;
281        let _line_index = LineIndex::new(content.to_string());
282
283        let warnings = self.check(ctx)?;
284        if warnings.is_empty() {
285            return Ok(content.to_string());
286        }
287
288        let lines: Vec<&str> = content.lines().collect();
289
290        let mut fixed_lines: Vec<String> = Vec::with_capacity(lines.len());
291
292        // Create a map of line numbers to replacements
293
294        let mut line_replacements = std::collections::HashMap::new();
295        for warning in warnings {
296            if let Some(fix) = warning.fix {
297                // Line number is 1-based in warnings but we need 0-based for indexing
298                let line_idx = warning.line - 1;
299                line_replacements.insert(line_idx, fix.replacement);
300            }
301        }
302
303        // Apply replacements line by line
304
305        let mut i = 0;
306        while i < lines.len() {
307            if let Some(_replacement) = line_replacements.get(&i) {
308                let prev_line_is_blank = i > 0 && Self::is_blank_line(lines[i - 1]);
309                let prev_line_is_list = i > 0 && Self::is_bullet_list_item(lines[i - 1]).is_some();
310                // Only insert a blank line if previous line is not blank and not a list
311                if !prev_line_is_blank && !prev_line_is_list && i > 0 {
312                    fixed_lines.push(String::new());
313                }
314                // The replacement is the fixed line (unindented list item)
315                // Use the original line, trimmed of leading whitespace
316                let fixed_line = lines[i].trim_start();
317                fixed_lines.push(fixed_line.to_string());
318            } else {
319                fixed_lines.push(lines[i].to_string());
320            }
321            i += 1;
322        }
323
324        // Join the lines with newlines
325
326        let result = fixed_lines.join("\n");
327        if content.ends_with('\n') {
328            Ok(result + "\n")
329        } else {
330            Ok(result)
331        }
332    }
333
334    /// Optimized check using document structure
335    fn check_with_structure(
336        &self,
337        _ctx: &crate::lint_context::LintContext,
338        doc_structure: &DocumentStructure,
339    ) -> LintResult {
340        let content = _ctx.content;
341        if doc_structure.list_lines.is_empty() {
342            return Ok(Vec::new());
343        }
344        if !content.contains('*') && !content.contains('-') && !content.contains('+') {
345            return Ok(Vec::new());
346        }
347        let line_index = LineIndex::new(content.to_string());
348        let mut result = Vec::new();
349        let lines: Vec<&str> = content.lines().collect();
350        let mut valid_bullet_lines = vec![false; lines.len()];
351        for &line_num in &doc_structure.list_lines {
352            let line_idx = line_num - 1;
353            if line_idx >= lines.len() {
354                continue;
355            }
356            let line = lines[line_idx];
357            if doc_structure.is_in_code_block(line_num) {
358                continue;
359            }
360            if let Some(indent) = Self::is_bullet_list_item(line) {
361                let mut is_valid = false; // Assume invalid initially
362                if indent == 0 {
363                    is_valid = true;
364                } else {
365                    match Self::find_relevant_previous_bullet(&lines, line_idx) {
366                        Some((prev_idx, prev_indent)) => {
367                            match prev_indent.cmp(&indent) {
368                                std::cmp::Ordering::Less | std::cmp::Ordering::Equal => {
369                                    // Valid nesting or sibling if previous item was valid
370                                    is_valid = valid_bullet_lines[prev_idx];
371                                }
372                                std::cmp::Ordering::Greater => {
373                                    // remains invalid
374                                }
375                            }
376                        }
377                        None => {
378                            // Indented item with no previous bullet remains invalid
379                        }
380                    }
381                }
382                valid_bullet_lines[line_idx] = is_valid;
383
384                if !is_valid {
385                    // Calculate the precise range for the indentation that needs to be removed
386                    // For "  * Indented bullet", we want to highlight the indentation, marker, and space after marker "  * " (columns 1-4)
387                    let start_col = 1; // Start from beginning of line
388                    let end_col = indent + 3; // Include marker and space after it (indent + 1 for marker + 1 for space + 1 for inclusive range)
389
390                    // For the fix, we need to replace the highlighted part ("  *") with just the bullet marker ("* ")
391                    let line = lines[line_idx];
392                    let trimmed = line.trim_start();
393                    // Extract just the bullet marker and normalize to single space
394                    let bullet_part = if let Some(captures) = UNORDERED_LIST_MARKER_REGEX.captures(trimmed) {
395                        format!("{} ", captures.get(2).unwrap().as_str()) // Always use single space
396                    } else {
397                        "* ".to_string() // fallback
398                    };
399                    let replacement = bullet_part;
400
401                    result.push(LintWarning {
402                        rule_name: Some(self.name()),
403                        severity: Severity::Warning,
404                        line: line_num,
405                        column: start_col,
406                        end_line: line_num,
407                        end_column: end_col,
408                        message: "List item indentation".to_string(),
409                        fix: Some(Fix {
410                            range: {
411                                let start_byte = line_index.line_col_to_byte_range(line_num, start_col).start;
412                                let end_byte = line_index.line_col_to_byte_range(line_num, end_col).start;
413                                start_byte..end_byte
414                            },
415                            replacement,
416                        }),
417                    });
418                }
419            }
420        }
421        Ok(result)
422    }
423
424    /// Get the category of this rule for selective processing
425    fn category(&self) -> RuleCategory {
426        RuleCategory::List
427    }
428
429    /// Check if this rule should be skipped
430    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
431        let content = ctx.content;
432        content.is_empty() || (!content.contains('*') && !content.contains('-') && !content.contains('+'))
433    }
434
435    fn as_any(&self) -> &dyn std::any::Any {
436        self
437    }
438
439    fn as_maybe_document_structure(&self) -> Option<&dyn crate::rule::MaybeDocumentStructure> {
440        None
441    }
442
443    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
444    where
445        Self: Sized,
446    {
447        Box::new(MD006StartBullets)
448    }
449
450    fn default_config_section(&self) -> Option<(String, toml::Value)> {
451        None
452    }
453}
454
455impl DocumentStructureExtensions for MD006StartBullets {
456    fn has_relevant_elements(
457        &self,
458        ctx: &crate::lint_context::LintContext,
459        _doc_structure: &DocumentStructure,
460    ) -> bool {
461        // This rule is only relevant if there are unordered list items
462        ctx.list_blocks.iter().any(|block| !block.is_ordered)
463    }
464}
465
466#[cfg(test)]
467mod tests {
468    use super::*;
469
470    #[test]
471    fn test_with_document_structure() {
472        let rule = MD006StartBullets;
473
474        // Test with properly formatted lists
475        let content_valid = "* Item 1\n* Item 2\n  * Nested item\n  * Another nested item";
476        let structure_valid = DocumentStructure::new(content_valid);
477        let ctx_valid = crate::lint_context::LintContext::new(content_valid, crate::config::MarkdownFlavor::Standard);
478        let result_valid = rule.check_with_structure(&ctx_valid, &structure_valid).unwrap();
479        assert!(
480            result_valid.is_empty(),
481            "Properly formatted lists should not generate warnings, found: {result_valid:?}"
482        );
483
484        // Test with improperly indented list - adjust expectations based on actual implementation
485        let content_invalid = "  * Item 1\n  * Item 2\n    * Nested item";
486        let structure = DocumentStructure::new(content_invalid);
487        let ctx_invalid =
488            crate::lint_context::LintContext::new(content_invalid, crate::config::MarkdownFlavor::Standard);
489        let result = rule.check_with_structure(&ctx_invalid, &structure).unwrap();
490
491        // If no warnings are generated, the test should be updated to match implementation behavior
492        assert!(!result.is_empty(), "Improperly indented lists should generate warnings");
493        assert_eq!(
494            result.len(),
495            3,
496            "Should generate warnings for all improperly indented items (2 top-level + 1 nested)"
497        );
498
499        // Test with mixed indentation - standard nesting is VALID
500        let content = "* Item 1\n  * Item 2 (standard nesting is valid)";
501        let structure = DocumentStructure::new(content);
502        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
503        let result = rule.check_with_structure(&ctx, &structure).unwrap();
504        // Assert that standard nesting does NOT generate warnings
505        assert!(
506            result.is_empty(),
507            "Standard nesting (* Item ->   * Item) should NOT generate warnings, found: {result:?}"
508        );
509    }
510
511    #[test]
512    fn test_bullets_nested_under_numbered_items() {
513        let rule = MD006StartBullets;
514        let content = "\
5151. **Active Directory/LDAP**
516   - User authentication and directory services
517   - LDAP for user information and validation
518
5192. **Oracle Unified Directory (OUD)**
520   - Extended user directory services";
521        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
522        let result = rule.check(&ctx).unwrap();
523        // Should have no warnings - 3 spaces is valid for bullets under numbered items
524        assert!(
525            result.is_empty(),
526            "Expected no warnings for bullets with 3 spaces under numbered items, got: {result:?}"
527        );
528    }
529
530    #[test]
531    fn test_bullets_nested_under_numbered_items_wrong_indent() {
532        let rule = MD006StartBullets;
533        let content = "\
5341. **Active Directory/LDAP**
535  - Wrong: only 2 spaces
536 - Wrong: only 1 space";
537        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
538        let result = rule.check(&ctx).unwrap();
539        // Should flag the incorrect indentations (less than 3 spaces)
540        assert_eq!(
541            result.len(),
542            2,
543            "Expected warnings for bullets with insufficient spacing under numbered items"
544        );
545        assert!(result.iter().any(|w| w.line == 2));
546        assert!(result.iter().any(|w| w.line == 3));
547    }
548
549    #[test]
550    fn test_regular_bullet_nesting_still_works() {
551        let rule = MD006StartBullets;
552        let content = "\
553* Top level
554  * Nested bullet (2 spaces is correct)
555    * Deeply nested (4 spaces)";
556        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
557        let result = rule.check(&ctx).unwrap();
558        // Should have no warnings - standard bullet nesting still works
559        assert!(
560            result.is_empty(),
561            "Expected no warnings for standard bullet nesting, got: {result:?}"
562        );
563    }
564}