rumdl_lib/rules/
md030_list_marker_space.rs

1//!
2//! Rule MD030: Spaces after list markers
3//!
4//! See [docs/md030.md](../../docs/md030.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::rule_config_serde::RuleConfig;
8use crate::rules::list_utils::ListType;
9use crate::utils::range_utils::calculate_match_range;
10use toml;
11
12mod md030_config;
13use md030_config::MD030Config;
14
15#[derive(Clone, Default)]
16pub struct MD030ListMarkerSpace {
17    config: MD030Config,
18}
19
20impl MD030ListMarkerSpace {
21    pub fn new(ul_single: usize, ul_multi: usize, ol_single: usize, ol_multi: usize) -> Self {
22        Self {
23            config: MD030Config {
24                ul_single: crate::types::PositiveUsize::new(ul_single)
25                    .unwrap_or(crate::types::PositiveUsize::from_const(1)),
26                ul_multi: crate::types::PositiveUsize::new(ul_multi)
27                    .unwrap_or(crate::types::PositiveUsize::from_const(1)),
28                ol_single: crate::types::PositiveUsize::new(ol_single)
29                    .unwrap_or(crate::types::PositiveUsize::from_const(1)),
30                ol_multi: crate::types::PositiveUsize::new(ol_multi)
31                    .unwrap_or(crate::types::PositiveUsize::from_const(1)),
32            },
33        }
34    }
35
36    pub fn from_config_struct(config: MD030Config) -> Self {
37        Self { config }
38    }
39
40    pub fn get_expected_spaces(&self, list_type: ListType, is_multi: bool) -> usize {
41        match (list_type, is_multi) {
42            (ListType::Unordered, false) => self.config.ul_single.get(),
43            (ListType::Unordered, true) => self.config.ul_multi.get(),
44            (ListType::Ordered, false) => self.config.ol_single.get(),
45            (ListType::Ordered, true) => self.config.ol_multi.get(),
46        }
47    }
48}
49
50impl Rule for MD030ListMarkerSpace {
51    fn name(&self) -> &'static str {
52        "MD030"
53    }
54
55    fn description(&self) -> &'static str {
56        "Spaces after list markers should be consistent"
57    }
58
59    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
60        let mut warnings = Vec::new();
61
62        // Early return if no list content
63        if self.should_skip(ctx) {
64            return Ok(warnings);
65        }
66
67        // Pre-filter lines that are actually list items
68        let mut list_item_lines = Vec::new();
69        for (line_num, line_info) in ctx.lines.iter().enumerate() {
70            if line_info.list_item.is_some() && !line_info.in_code_block {
71                list_item_lines.push(line_num + 1);
72            }
73        }
74
75        // Collect lines once instead of in every is_multi_line_list_item call
76        let lines: Vec<&str> = ctx.content.lines().collect();
77
78        for line_num in list_item_lines {
79            let line = lines[line_num - 1];
80
81            // Skip indented code blocks (4+ spaces or tab)
82            if line.starts_with("    ") || line.starts_with("\t") {
83                continue;
84            }
85
86            // Use pre-computed list item information
87            // LintContext already handles blockquotes by stripping prefixes and
88            // storing correct marker_column/content_column values
89            if let Some(line_info) = ctx.line_info(line_num)
90                && let Some(list_info) = &line_info.list_item
91            {
92                let list_type = if list_info.is_ordered {
93                    ListType::Ordered
94                } else {
95                    ListType::Unordered
96                };
97
98                // Calculate actual spacing after marker
99                let marker_end = list_info.marker_column + list_info.marker.len();
100                let actual_spaces = list_info.content_column.saturating_sub(marker_end);
101
102                // Determine if this is a multi-line list item
103                let is_multi_line = self.is_multi_line_list_item(ctx, line_num, &lines);
104                let expected_spaces = self.get_expected_spaces(list_type, is_multi_line);
105
106                // MD030 only checks for incorrect number of spaces, not tabs
107                // Tabs are handled by MD010 (no-hard-tabs), matching markdownlint behavior
108                // Check if spacing is incorrect
109                if actual_spaces != expected_spaces {
110                    // Calculate precise character range for the problematic spacing
111                    let whitespace_start_pos = marker_end;
112                    let whitespace_len = actual_spaces;
113
114                    // Calculate the range that needs to be replaced (the entire whitespace after marker)
115                    let (start_line, start_col, end_line, end_col) =
116                        calculate_match_range(line_num, line, whitespace_start_pos, whitespace_len);
117
118                    // Generate the correct replacement text (just the correct spacing)
119                    let correct_spaces = " ".repeat(expected_spaces);
120
121                    // Calculate byte positions for the fix range
122                    let line_start_byte = ctx.line_offsets.get(line_num - 1).copied().unwrap_or(0);
123                    let whitespace_start_byte = line_start_byte + whitespace_start_pos;
124                    let whitespace_end_byte = whitespace_start_byte + whitespace_len;
125
126                    let fix = Some(crate::rule::Fix {
127                        range: whitespace_start_byte..whitespace_end_byte,
128                        replacement: correct_spaces,
129                    });
130
131                    // Generate appropriate message
132                    let message =
133                        format!("Spaces after list markers (Expected: {expected_spaces}; Actual: {actual_spaces})");
134
135                    warnings.push(LintWarning {
136                        rule_name: Some(self.name().to_string()),
137                        severity: Severity::Warning,
138                        line: start_line,
139                        column: start_col,
140                        end_line,
141                        end_column: end_col,
142                        message,
143                        fix,
144                    });
145                }
146            }
147        }
148        Ok(warnings)
149    }
150
151    fn category(&self) -> RuleCategory {
152        RuleCategory::List
153    }
154
155    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
156        if ctx.content.is_empty() {
157            return true;
158        }
159
160        // Fast byte-level check for list markers (including ordered lists)
161        let bytes = ctx.content.as_bytes();
162        !bytes.contains(&b'*')
163            && !bytes.contains(&b'-')
164            && !bytes.contains(&b'+')
165            && !bytes.iter().any(|&b| b.is_ascii_digit())
166    }
167
168    fn as_any(&self) -> &dyn std::any::Any {
169        self
170    }
171
172    fn default_config_section(&self) -> Option<(String, toml::Value)> {
173        let default_config = MD030Config::default();
174        let json_value = serde_json::to_value(&default_config).ok()?;
175        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
176
177        if let toml::Value::Table(table) = toml_value {
178            if !table.is_empty() {
179                Some((MD030Config::RULE_NAME.to_string(), toml::Value::Table(table)))
180            } else {
181                None
182            }
183        } else {
184            None
185        }
186    }
187
188    fn from_config(config: &crate::config::Config) -> Box<dyn Rule> {
189        let rule_config = crate::rule_config_serde::load_rule_config::<MD030Config>(config);
190        Box::new(Self::from_config_struct(rule_config))
191    }
192
193    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, crate::rule::LintError> {
194        let content = ctx.content;
195
196        // Early return if no fixes needed
197        if self.should_skip(ctx) {
198            return Ok(content.to_string());
199        }
200
201        // DocumentStructure is no longer used for optimization
202        let lines: Vec<&str> = content.lines().collect();
203        let mut result_lines = Vec::with_capacity(lines.len());
204
205        // Pre-compute which lines need potential fixes
206        let mut needs_check = vec![false; lines.len()];
207        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
208            if line_info.list_item.is_some() && !line_info.in_code_block {
209                needs_check[line_idx] = true;
210            }
211        }
212
213        for (line_idx, line) in lines.iter().enumerate() {
214            let line_num = line_idx + 1;
215
216            // Quick check: if this line doesn't need checking, just add it
217            if !needs_check[line_idx] {
218                result_lines.push(line.to_string());
219                continue;
220            }
221
222            // Skip if in front matter
223            // Note: Front matter checking is handled by LintContext directly
224            // No additional front matter check needed here
225
226            // Skip if this is an indented code block (4+ spaces with blank line before)
227            if self.is_indented_code_block(line, line_idx, &lines) {
228                result_lines.push(line.to_string());
229                continue;
230            }
231
232            // Try to fix list marker spacing (handles blockquotes internally)
233            let is_multi_line = self.is_multi_line_list_item(ctx, line_num, &lines);
234            if let Some(fixed_line) = self.try_fix_list_marker_spacing_with_context(line, is_multi_line) {
235                result_lines.push(fixed_line);
236            } else {
237                result_lines.push(line.to_string());
238            }
239        }
240
241        // Preserve trailing newline if original content had one
242        let result = result_lines.join("\n");
243        if content.ends_with('\n') && !result.ends_with('\n') {
244            Ok(result + "\n")
245        } else {
246            Ok(result)
247        }
248    }
249}
250
251impl MD030ListMarkerSpace {
252    /// Check if a list item is multi-line (spans multiple lines or contains nested content)
253    fn is_multi_line_list_item(&self, ctx: &crate::lint_context::LintContext, line_num: usize, lines: &[&str]) -> bool {
254        // Get the current list item info
255        let current_line_info = match ctx.line_info(line_num) {
256            Some(info) if info.list_item.is_some() => info,
257            _ => return false,
258        };
259
260        let current_list = current_line_info.list_item.as_ref().unwrap();
261
262        // Check subsequent lines to see if they are continuation of this list item
263        for next_line_num in (line_num + 1)..=lines.len() {
264            if let Some(next_line_info) = ctx.line_info(next_line_num) {
265                // If we encounter another list item at the same or higher level, this item is done
266                if let Some(next_list) = &next_line_info.list_item {
267                    if next_list.marker_column <= current_list.marker_column {
268                        break; // Found the next list item at same/higher level
269                    }
270                    // If there's a nested list item, this is multi-line
271                    return true;
272                }
273
274                // If we encounter a non-empty line that's not indented enough to be part of this list item,
275                // this list item is done
276                let line_content = lines.get(next_line_num - 1).unwrap_or(&"");
277                if !line_content.trim().is_empty() {
278                    let expected_continuation_indent = current_list.content_column;
279                    let actual_indent = line_content.len() - line_content.trim_start().len();
280
281                    if actual_indent < expected_continuation_indent {
282                        break; // Line is not indented enough to be part of this list item
283                    }
284
285                    // If we find a continuation line, this is multi-line
286                    if actual_indent >= expected_continuation_indent {
287                        return true;
288                    }
289                }
290
291                // Empty lines don't affect the multi-line status by themselves
292            }
293        }
294
295        false
296    }
297
298    /// Helper to fix marker spacing for both ordered and unordered lists
299    fn fix_marker_spacing(
300        &self,
301        marker: &str,
302        after_marker: &str,
303        indent: &str,
304        is_multi_line: bool,
305        is_ordered: bool,
306    ) -> Option<String> {
307        // MD030 only fixes multiple spaces, not tabs
308        // Tabs are handled by MD010 (no-hard-tabs), matching markdownlint behavior
309        // Skip if the spacing starts with a tab
310        if after_marker.starts_with('\t') {
311            return None;
312        }
313
314        // Fix if there are multiple spaces
315        if after_marker.starts_with("  ") {
316            let content = after_marker.trim_start_matches(' ');
317            if !content.is_empty() {
318                // Use appropriate configuration based on list type and whether it's multi-line
319                let spaces = if is_ordered {
320                    if is_multi_line {
321                        " ".repeat(self.config.ol_multi.get())
322                    } else {
323                        " ".repeat(self.config.ol_single.get())
324                    }
325                } else if is_multi_line {
326                    " ".repeat(self.config.ul_multi.get())
327                } else {
328                    " ".repeat(self.config.ul_single.get())
329                };
330                return Some(format!("{indent}{marker}{spaces}{content}"));
331            }
332        }
333        None
334    }
335
336    /// Fix list marker spacing with context - handles tabs, multiple spaces, and mixed whitespace
337    fn try_fix_list_marker_spacing_with_context(&self, line: &str, is_multi_line: bool) -> Option<String> {
338        // Extract blockquote prefix if present
339        let (blockquote_prefix, content) = Self::strip_blockquote_prefix(line);
340
341        let trimmed = content.trim_start();
342        let indent = &content[..content.len() - trimmed.len()];
343
344        // Check for unordered list markers
345        for marker in &["*", "-", "+"] {
346            if let Some(after_marker) = trimmed.strip_prefix(marker) {
347                if let Some(fixed) = self.fix_marker_spacing(marker, after_marker, indent, is_multi_line, false) {
348                    return Some(format!("{blockquote_prefix}{fixed}"));
349                }
350                break; // Found a marker, don't check others
351            }
352        }
353
354        // Check for ordered list markers
355        if let Some(dot_pos) = trimmed.find('.') {
356            let before_dot = &trimmed[..dot_pos];
357            if before_dot.chars().all(|c| c.is_ascii_digit()) && !before_dot.is_empty() {
358                let after_dot = &trimmed[dot_pos + 1..];
359                let marker = format!("{before_dot}.");
360                if let Some(fixed) = self.fix_marker_spacing(&marker, after_dot, indent, is_multi_line, true) {
361                    return Some(format!("{blockquote_prefix}{fixed}"));
362                }
363            }
364        }
365
366        None
367    }
368
369    /// Strip blockquote prefix from a line, returning (prefix, content)
370    fn strip_blockquote_prefix(line: &str) -> (String, &str) {
371        let mut prefix = String::new();
372        let mut remaining = line;
373
374        loop {
375            let trimmed = remaining.trim_start();
376            if !trimmed.starts_with('>') {
377                break;
378            }
379            // Add leading spaces to prefix
380            let leading_spaces = remaining.len() - trimmed.len();
381            prefix.push_str(&remaining[..leading_spaces]);
382            prefix.push('>');
383            remaining = &trimmed[1..];
384
385            // Handle optional space after >
386            if remaining.starts_with(' ') {
387                prefix.push(' ');
388                remaining = &remaining[1..];
389            }
390        }
391
392        (prefix, remaining)
393    }
394
395    /// Fix list marker spacing - handles tabs, multiple spaces, and mixed whitespace
396    /// (Legacy method for backward compatibility - defaults to single-line behavior)
397    /// Check if a line is part of an indented code block (4+ spaces with blank line before)
398    fn is_indented_code_block(&self, line: &str, line_idx: usize, lines: &[&str]) -> bool {
399        // Must start with 4+ spaces or tab
400        if !line.starts_with("    ") && !line.starts_with('\t') {
401            return false;
402        }
403
404        // If it's the first line, it's not an indented code block
405        if line_idx == 0 {
406            return false;
407        }
408
409        // Check if there's a blank line before this line or before the start of the indented block
410        if self.has_blank_line_before_indented_block(line_idx, lines) {
411            return true;
412        }
413
414        false
415    }
416
417    /// Check if there's a blank line before the start of an indented block
418    fn has_blank_line_before_indented_block(&self, line_idx: usize, lines: &[&str]) -> bool {
419        // Walk backwards to find the start of the indented block
420        let mut current_idx = line_idx;
421
422        // Find the first line in this indented block
423        while current_idx > 0 {
424            let current_line = lines[current_idx];
425            let prev_line = lines[current_idx - 1];
426
427            // If current line is not indented, we've gone too far
428            if !current_line.starts_with("    ") && !current_line.starts_with('\t') {
429                break;
430            }
431
432            // If previous line is not indented, check if it's blank
433            if !prev_line.starts_with("    ") && !prev_line.starts_with('\t') {
434                return prev_line.trim().is_empty();
435            }
436
437            current_idx -= 1;
438        }
439
440        false
441    }
442}
443
444#[cfg(test)]
445mod tests {
446    use super::*;
447    use crate::lint_context::LintContext;
448
449    #[test]
450    fn test_basic_functionality() {
451        let rule = MD030ListMarkerSpace::default();
452        let content = "* Item 1\n* Item 2\n  * Nested item\n1. Ordered item";
453        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
454        let result = rule.check(&ctx).unwrap();
455        assert!(
456            result.is_empty(),
457            "Correctly spaced list markers should not generate warnings"
458        );
459        let content = "*  Item 1 (too many spaces)\n* Item 2\n1.   Ordered item (too many spaces)";
460        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
461        let result = rule.check(&ctx).unwrap();
462        // Expect warnings for lines with too many spaces after the marker
463        assert_eq!(
464            result.len(),
465            2,
466            "Should flag lines with too many spaces after list marker"
467        );
468        for warning in result {
469            assert!(
470                warning.message.starts_with("Spaces after list markers (Expected:")
471                    && warning.message.contains("Actual:"),
472                "Warning message should include expected and actual values, got: '{}'",
473                warning.message
474            );
475        }
476    }
477}