rumdl_lib/rules/
md030_list_marker_space.rs

1//!
2//! Rule MD030: Spaces after list markers
3//!
4//! See [docs/md030.md](../../docs/md030.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::rule_config_serde::RuleConfig;
8use crate::rules::list_utils::ListType;
9use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
10use crate::utils::range_utils::calculate_match_range;
11use toml;
12
13mod md030_config;
14use md030_config::MD030Config;
15
16#[derive(Clone, Default)]
17pub struct MD030ListMarkerSpace {
18    config: MD030Config,
19}
20
21impl MD030ListMarkerSpace {
22    pub fn new(ul_single: usize, ul_multi: usize, ol_single: usize, ol_multi: usize) -> Self {
23        Self {
24            config: MD030Config {
25                ul_single,
26                ul_multi,
27                ol_single,
28                ol_multi,
29            },
30        }
31    }
32
33    pub fn from_config_struct(config: MD030Config) -> Self {
34        Self { config }
35    }
36
37    pub fn get_expected_spaces(&self, list_type: ListType, is_multi: bool) -> usize {
38        match (list_type, is_multi) {
39            (ListType::Unordered, false) => self.config.ul_single,
40            (ListType::Unordered, true) => self.config.ul_multi,
41            (ListType::Ordered, false) => self.config.ol_single,
42            (ListType::Ordered, true) => self.config.ol_multi,
43        }
44    }
45}
46
47impl Rule for MD030ListMarkerSpace {
48    fn name(&self) -> &'static str {
49        "MD030"
50    }
51
52    fn description(&self) -> &'static str {
53        "Spaces after list markers should be consistent"
54    }
55
56    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
57        let mut warnings = Vec::new();
58        let lines: Vec<String> = ctx.content.lines().map(|l| l.to_string()).collect();
59        let mut in_blockquote = false;
60        for (i, line) in lines.iter().enumerate() {
61            let line_num = i + 1;
62
63            // Skip if in code block
64            if let Some(line_info) = ctx.line_info(line_num)
65                && line_info.in_code_block
66            {
67                continue;
68            }
69            // Skip indented code blocks (4+ spaces or tab)
70            if line.starts_with("    ") || line.starts_with("\t") {
71                continue;
72            }
73            // Track blockquotes (for now, just skip lines starting with >)
74            let mut l = line.as_str();
75            while l.trim_start().starts_with('>') {
76                l = l.trim_start().trim_start_matches('>').trim_start();
77                in_blockquote = true;
78            }
79            if in_blockquote {
80                in_blockquote = false;
81                continue;
82            }
83            // Use pre-computed list item information
84            if let Some(line_info) = ctx.line_info(line_num)
85                && let Some(list_info) = &line_info.list_item
86            {
87                let list_type = if list_info.is_ordered {
88                    ListType::Ordered
89                } else {
90                    ListType::Unordered
91                };
92
93                // Calculate actual spacing after marker
94                let marker_end = list_info.marker_column + list_info.marker.len();
95                let actual_spaces = list_info.content_column.saturating_sub(marker_end);
96
97                let expected_spaces = self.get_expected_spaces(list_type, false);
98
99                // Check for tabs in the spacing
100                let line_content = &line[list_info.marker_column..];
101                let spacing_content = if line_content.len() > list_info.marker.len() {
102                    let after_marker_start = list_info.marker.len();
103                    let after_marker_end = after_marker_start + actual_spaces;
104                    &line_content[after_marker_start..after_marker_end.min(line_content.len())]
105                } else {
106                    ""
107                };
108                let has_tabs = spacing_content.contains('\t');
109
110                // Check if spacing is incorrect or contains tabs
111                if actual_spaces != expected_spaces || has_tabs {
112                    // Calculate precise character range for the problematic spacing
113                    let whitespace_start_pos = marker_end;
114                    let whitespace_len = actual_spaces;
115
116                    // Calculate the range that needs to be replaced (the entire whitespace after marker)
117                    let (start_line, start_col, end_line, end_col) =
118                        calculate_match_range(line_num, line, whitespace_start_pos, whitespace_len);
119
120                    // Generate the correct replacement text (just the correct spacing)
121                    let correct_spaces = " ".repeat(expected_spaces);
122
123                    // Calculate byte positions for the fix range
124                    let line_start_byte = ctx.line_offsets.get(line_num - 1).copied().unwrap_or(0);
125                    let whitespace_start_byte = line_start_byte + whitespace_start_pos;
126                    let whitespace_end_byte = whitespace_start_byte + whitespace_len;
127
128                    let fix = Some(crate::rule::Fix {
129                        range: whitespace_start_byte..whitespace_end_byte,
130                        replacement: correct_spaces,
131                    });
132
133                    // Generate appropriate message
134                    let message =
135                        format!("Spaces after list markers (Expected: {expected_spaces}; Actual: {actual_spaces})");
136
137                    warnings.push(LintWarning {
138                        rule_name: Some(self.name()),
139                        severity: Severity::Warning,
140                        line: start_line,
141                        column: start_col,
142                        end_line,
143                        end_column: end_col,
144                        message,
145                        fix,
146                    });
147                }
148            }
149        }
150        Ok(warnings)
151    }
152
153    fn category(&self) -> RuleCategory {
154        RuleCategory::List
155    }
156
157    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
158        ctx.content.is_empty()
159            || (!ctx.content.contains('*')
160                && !ctx.content.contains('-')
161                && !ctx.content.contains('+')
162                && !ctx.content.contains(|c: char| c.is_ascii_digit()))
163    }
164
165    fn as_any(&self) -> &dyn std::any::Any {
166        self
167    }
168
169    fn as_maybe_document_structure(&self) -> Option<&dyn crate::rule::MaybeDocumentStructure> {
170        Some(self)
171    }
172
173    fn default_config_section(&self) -> Option<(String, toml::Value)> {
174        let default_config = MD030Config::default();
175        let json_value = serde_json::to_value(&default_config).ok()?;
176        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
177
178        if let toml::Value::Table(table) = toml_value {
179            if !table.is_empty() {
180                Some((MD030Config::RULE_NAME.to_string(), toml::Value::Table(table)))
181            } else {
182                None
183            }
184        } else {
185            None
186        }
187    }
188
189    fn from_config(config: &crate::config::Config) -> Box<dyn Rule> {
190        let rule_config = crate::rule_config_serde::load_rule_config::<MD030Config>(config);
191        Box::new(Self::from_config_struct(rule_config))
192    }
193
194    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, crate::rule::LintError> {
195        let content = ctx.content;
196        let structure = crate::utils::document_structure::DocumentStructure::new(content);
197        let lines: Vec<&str> = content.lines().collect();
198        let mut result_lines = Vec::new();
199
200        for (line_idx, line) in lines.iter().enumerate() {
201            let line_num = line_idx + 1;
202
203            // Skip if in code block
204            if let Some(line_info) = ctx.line_info(line_num)
205                && line_info.in_code_block
206            {
207                result_lines.push(line.to_string());
208                continue;
209            }
210
211            // Skip if in front matter
212            if structure.is_in_front_matter(line_num) {
213                result_lines.push(line.to_string());
214                continue;
215            }
216
217            // Skip if this is an indented code block (4+ spaces with blank line before)
218            if self.is_indented_code_block(line, line_idx, &lines) {
219                result_lines.push(line.to_string());
220                continue;
221            }
222
223            // Skip blockquotes for now (conservative approach)
224            if line.trim_start().starts_with('>') {
225                result_lines.push(line.to_string());
226                continue;
227            }
228
229            // Try to fix list marker spacing
230            if let Some(fixed_line) = self.try_fix_list_marker_spacing(line) {
231                result_lines.push(fixed_line);
232            } else {
233                result_lines.push(line.to_string());
234            }
235        }
236
237        // Preserve trailing newline if original content had one
238        let result = result_lines.join("\n");
239        if content.ends_with('\n') && !result.ends_with('\n') {
240            Ok(result + "\n")
241        } else {
242            Ok(result)
243        }
244    }
245}
246
247impl MD030ListMarkerSpace {
248    /// Fix list marker spacing - handles tabs, multiple spaces, and mixed whitespace
249    fn try_fix_list_marker_spacing(&self, line: &str) -> Option<String> {
250        let trimmed = line.trim_start();
251        let indent = &line[..line.len() - trimmed.len()];
252
253        // Check for unordered list markers
254        for marker in &["*", "-", "+"] {
255            if let Some(after_marker) = trimmed.strip_prefix(marker) {
256                // Fix if there are tabs, multiple spaces, or mixed whitespace
257                if after_marker.starts_with('\t')
258                    || after_marker.starts_with("  ")
259                    || (after_marker.starts_with(' ') && after_marker.chars().nth(1) == Some('\t'))
260                {
261                    let content = after_marker.trim_start();
262                    if !content.is_empty() {
263                        // Use the configured number of spaces for unordered lists
264                        let spaces = " ".repeat(self.config.ul_single);
265                        return Some(format!("{indent}{marker}{spaces}{content}"));
266                    }
267                }
268                break; // Found a marker, don't check others
269            }
270        }
271
272        // Check for ordered list markers
273        if let Some(dot_pos) = trimmed.find('.') {
274            let before_dot = &trimmed[..dot_pos];
275            if before_dot.chars().all(|c| c.is_ascii_digit()) && !before_dot.is_empty() {
276                let after_dot = &trimmed[dot_pos + 1..];
277                // Fix if there are tabs, multiple spaces, or mixed whitespace
278                if after_dot.starts_with('\t')
279                    || after_dot.starts_with("  ")
280                    || (after_dot.starts_with(' ') && after_dot.chars().nth(1) == Some('\t'))
281                {
282                    let content = after_dot.trim_start();
283                    if !content.is_empty() {
284                        // Use the configured number of spaces for ordered lists
285                        let spaces = " ".repeat(self.config.ol_single);
286                        return Some(format!("{indent}{before_dot}.{spaces}{content}"));
287                    }
288                }
289            }
290        }
291
292        None
293    }
294
295    /// Check if a line is part of an indented code block (4+ spaces with blank line before)
296    fn is_indented_code_block(&self, line: &str, line_idx: usize, lines: &[&str]) -> bool {
297        // Must start with 4+ spaces or tab
298        if !line.starts_with("    ") && !line.starts_with('\t') {
299            return false;
300        }
301
302        // If it's the first line, it's not an indented code block
303        if line_idx == 0 {
304            return false;
305        }
306
307        // Check if there's a blank line before this line or before the start of the indented block
308        if self.has_blank_line_before_indented_block(line_idx, lines) {
309            return true;
310        }
311
312        false
313    }
314
315    /// Check if there's a blank line before the start of an indented block
316    fn has_blank_line_before_indented_block(&self, line_idx: usize, lines: &[&str]) -> bool {
317        // Walk backwards to find the start of the indented block
318        let mut current_idx = line_idx;
319
320        // Find the first line in this indented block
321        while current_idx > 0 {
322            let current_line = lines[current_idx];
323            let prev_line = lines[current_idx - 1];
324
325            // If current line is not indented, we've gone too far
326            if !current_line.starts_with("    ") && !current_line.starts_with('\t') {
327                break;
328            }
329
330            // If previous line is not indented, check if it's blank
331            if !prev_line.starts_with("    ") && !prev_line.starts_with('\t') {
332                return prev_line.trim().is_empty();
333            }
334
335            current_idx -= 1;
336        }
337
338        false
339    }
340}
341
342impl DocumentStructureExtensions for MD030ListMarkerSpace {
343    fn has_relevant_elements(
344        &self,
345        _ctx: &crate::lint_context::LintContext,
346        doc_structure: &DocumentStructure,
347    ) -> bool {
348        !doc_structure.list_lines.is_empty()
349    }
350}
351
352#[cfg(test)]
353mod tests {
354    use super::*;
355    use crate::lint_context::LintContext;
356
357    #[test]
358    fn test_with_document_structure() {
359        let rule = MD030ListMarkerSpace::default();
360        let content = "* Item 1\n* Item 2\n  * Nested item\n1. Ordered item";
361        let structure = DocumentStructure::new(content);
362        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
363        let result = rule.check_with_structure(&ctx, &structure).unwrap();
364        assert!(
365            result.is_empty(),
366            "Correctly spaced list markers should not generate warnings"
367        );
368        let content = "*  Item 1 (too many spaces)\n* Item 2\n1.   Ordered item (too many spaces)";
369        let structure = DocumentStructure::new(content);
370        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
371        let result = rule.check_with_structure(&ctx, &structure).unwrap();
372        // Expect warnings for lines with too many spaces after the marker
373        assert_eq!(
374            result.len(),
375            2,
376            "Should flag lines with too many spaces after list marker"
377        );
378        for warning in result {
379            assert!(
380                warning.message.starts_with("Spaces after list markers (Expected:")
381                    && warning.message.contains("Actual:"),
382                "Warning message should include expected and actual values, got: '{}'",
383                warning.message
384            );
385        }
386    }
387}