rumdl_lib/rules/
md030_list_marker_space.rs

1//!
2//! Rule MD030: Spaces after list markers
3//!
4//! See [docs/md030.md](../../docs/md030.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::rule_config_serde::RuleConfig;
8use crate::rules::list_utils::ListType;
9use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
10use crate::utils::range_utils::calculate_match_range;
11use toml;
12
13mod md030_config;
14use md030_config::MD030Config;
15
16#[derive(Clone, Default)]
17pub struct MD030ListMarkerSpace {
18    config: MD030Config,
19}
20
21impl MD030ListMarkerSpace {
22    pub fn new(ul_single: usize, ul_multi: usize, ol_single: usize, ol_multi: usize) -> Self {
23        Self {
24            config: MD030Config {
25                ul_single,
26                ul_multi,
27                ol_single,
28                ol_multi,
29            },
30        }
31    }
32
33    pub fn from_config_struct(config: MD030Config) -> Self {
34        Self { config }
35    }
36
37    pub fn get_expected_spaces(&self, list_type: ListType, is_multi: bool) -> usize {
38        match (list_type, is_multi) {
39            (ListType::Unordered, false) => self.config.ul_single,
40            (ListType::Unordered, true) => self.config.ul_multi,
41            (ListType::Ordered, false) => self.config.ol_single,
42            (ListType::Ordered, true) => self.config.ol_multi,
43        }
44    }
45}
46
47impl Rule for MD030ListMarkerSpace {
48    fn name(&self) -> &'static str {
49        "MD030"
50    }
51
52    fn description(&self) -> &'static str {
53        "Spaces after list markers should be consistent"
54    }
55
56    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
57        let mut warnings = Vec::new();
58
59        // Early return if no list content
60        if self.should_skip(ctx) {
61            return Ok(warnings);
62        }
63
64        // Pre-filter lines that are actually list items
65        let mut list_item_lines = Vec::new();
66        for (line_num, line_info) in ctx.lines.iter().enumerate() {
67            if line_info.list_item.is_some() && !line_info.in_code_block {
68                list_item_lines.push(line_num + 1);
69            }
70        }
71
72        let lines: Vec<&str> = ctx.content.lines().collect();
73        let mut in_blockquote = false;
74
75        for line_num in list_item_lines {
76            let line = lines[line_num - 1];
77
78            // Skip indented code blocks (4+ spaces or tab)
79            if line.starts_with("    ") || line.starts_with("\t") {
80                continue;
81            }
82
83            // Track blockquotes (for now, just skip lines starting with >)
84            let mut l = line;
85            while l.trim_start().starts_with('>') {
86                l = l.trim_start().trim_start_matches('>').trim_start();
87                in_blockquote = true;
88            }
89            if in_blockquote {
90                in_blockquote = false;
91                continue;
92            }
93
94            // Use pre-computed list item information
95            if let Some(line_info) = ctx.line_info(line_num)
96                && let Some(list_info) = &line_info.list_item
97            {
98                let list_type = if list_info.is_ordered {
99                    ListType::Ordered
100                } else {
101                    ListType::Unordered
102                };
103
104                // Calculate actual spacing after marker
105                let marker_end = list_info.marker_column + list_info.marker.len();
106                let actual_spaces = list_info.content_column.saturating_sub(marker_end);
107
108                // Determine if this is a multi-line list item
109                let is_multi_line = self.is_multi_line_list_item(ctx, line_num);
110                let expected_spaces = self.get_expected_spaces(list_type, is_multi_line);
111
112                // Check for tabs in the spacing
113                let line_content = &line[list_info.marker_column..];
114                let spacing_content = if line_content.len() > list_info.marker.len() {
115                    let after_marker_start = list_info.marker.len();
116                    let after_marker_end = after_marker_start + actual_spaces;
117                    &line_content[after_marker_start..after_marker_end.min(line_content.len())]
118                } else {
119                    ""
120                };
121                let has_tabs = spacing_content.contains('\t');
122
123                // Check if spacing is incorrect or contains tabs
124                if actual_spaces != expected_spaces || has_tabs {
125                    // Calculate precise character range for the problematic spacing
126                    let whitespace_start_pos = marker_end;
127                    let whitespace_len = actual_spaces;
128
129                    // Calculate the range that needs to be replaced (the entire whitespace after marker)
130                    let (start_line, start_col, end_line, end_col) =
131                        calculate_match_range(line_num, line, whitespace_start_pos, whitespace_len);
132
133                    // Generate the correct replacement text (just the correct spacing)
134                    let correct_spaces = " ".repeat(expected_spaces);
135
136                    // Calculate byte positions for the fix range
137                    let line_start_byte = ctx.line_offsets.get(line_num - 1).copied().unwrap_or(0);
138                    let whitespace_start_byte = line_start_byte + whitespace_start_pos;
139                    let whitespace_end_byte = whitespace_start_byte + whitespace_len;
140
141                    let fix = Some(crate::rule::Fix {
142                        range: whitespace_start_byte..whitespace_end_byte,
143                        replacement: correct_spaces,
144                    });
145
146                    // Generate appropriate message
147                    let message =
148                        format!("Spaces after list markers (Expected: {expected_spaces}; Actual: {actual_spaces})");
149
150                    warnings.push(LintWarning {
151                        rule_name: Some(self.name()),
152                        severity: Severity::Warning,
153                        line: start_line,
154                        column: start_col,
155                        end_line,
156                        end_column: end_col,
157                        message,
158                        fix,
159                    });
160                }
161            }
162        }
163        Ok(warnings)
164    }
165
166    fn category(&self) -> RuleCategory {
167        RuleCategory::List
168    }
169
170    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
171        if ctx.content.is_empty() {
172            return true;
173        }
174
175        // Fast byte-level check for list markers
176        let bytes = ctx.content.as_bytes();
177        !bytes.contains(&b'*')
178            && !bytes.contains(&b'-')
179            && !bytes.contains(&b'+')
180            && !bytes.iter().any(|&b| b.is_ascii_digit())
181    }
182
183    fn as_any(&self) -> &dyn std::any::Any {
184        self
185    }
186
187    fn as_maybe_document_structure(&self) -> Option<&dyn crate::rule::MaybeDocumentStructure> {
188        Some(self)
189    }
190
191    fn default_config_section(&self) -> Option<(String, toml::Value)> {
192        let default_config = MD030Config::default();
193        let json_value = serde_json::to_value(&default_config).ok()?;
194        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
195
196        if let toml::Value::Table(table) = toml_value {
197            if !table.is_empty() {
198                Some((MD030Config::RULE_NAME.to_string(), toml::Value::Table(table)))
199            } else {
200                None
201            }
202        } else {
203            None
204        }
205    }
206
207    fn from_config(config: &crate::config::Config) -> Box<dyn Rule> {
208        let rule_config = crate::rule_config_serde::load_rule_config::<MD030Config>(config);
209        Box::new(Self::from_config_struct(rule_config))
210    }
211
212    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, crate::rule::LintError> {
213        let content = ctx.content;
214
215        // Early return if no fixes needed
216        if self.should_skip(ctx) {
217            return Ok(content.to_string());
218        }
219
220        let structure = crate::utils::document_structure::DocumentStructure::new(content);
221        let lines: Vec<&str> = content.lines().collect();
222        let mut result_lines = Vec::with_capacity(lines.len());
223
224        // Pre-compute which lines need potential fixes
225        let mut needs_check = vec![false; lines.len()];
226        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
227            if line_info.list_item.is_some() && !line_info.in_code_block {
228                needs_check[line_idx] = true;
229            }
230        }
231
232        for (line_idx, line) in lines.iter().enumerate() {
233            let line_num = line_idx + 1;
234
235            // Quick check: if this line doesn't need checking, just add it
236            if !needs_check[line_idx] {
237                result_lines.push(line.to_string());
238                continue;
239            }
240
241            // Skip if in front matter
242            if structure.is_in_front_matter(line_num) {
243                result_lines.push(line.to_string());
244                continue;
245            }
246
247            // Skip if this is an indented code block (4+ spaces with blank line before)
248            if self.is_indented_code_block(line, line_idx, &lines) {
249                result_lines.push(line.to_string());
250                continue;
251            }
252
253            // Skip blockquotes for now (conservative approach)
254            if line.trim_start().starts_with('>') {
255                result_lines.push(line.to_string());
256                continue;
257            }
258
259            // Try to fix list marker spacing
260            let is_multi_line = self.is_multi_line_list_item(ctx, line_num);
261            if let Some(fixed_line) = self.try_fix_list_marker_spacing_with_context(line, is_multi_line) {
262                result_lines.push(fixed_line);
263            } else {
264                result_lines.push(line.to_string());
265            }
266        }
267
268        // Preserve trailing newline if original content had one
269        let result = result_lines.join("\n");
270        if content.ends_with('\n') && !result.ends_with('\n') {
271            Ok(result + "\n")
272        } else {
273            Ok(result)
274        }
275    }
276}
277
278impl MD030ListMarkerSpace {
279    /// Check if a list item is multi-line (spans multiple lines or contains nested content)
280    fn is_multi_line_list_item(&self, ctx: &crate::lint_context::LintContext, line_num: usize) -> bool {
281        // Get the current list item info
282        let current_line_info = match ctx.line_info(line_num) {
283            Some(info) if info.list_item.is_some() => info,
284            _ => return false,
285        };
286
287        let current_list = current_line_info.list_item.as_ref().unwrap();
288        let lines: Vec<&str> = ctx.content.lines().collect();
289
290        // Check subsequent lines to see if they are continuation of this list item
291        for next_line_num in (line_num + 1)..=lines.len() {
292            if let Some(next_line_info) = ctx.line_info(next_line_num) {
293                // If we encounter another list item at the same or higher level, this item is done
294                if let Some(next_list) = &next_line_info.list_item {
295                    if next_list.marker_column <= current_list.marker_column {
296                        break; // Found the next list item at same/higher level
297                    }
298                    // If there's a nested list item, this is multi-line
299                    return true;
300                }
301
302                // If we encounter a non-empty line that's not indented enough to be part of this list item,
303                // this list item is done
304                let line_content = lines.get(next_line_num - 1).unwrap_or(&"");
305                if !line_content.trim().is_empty() {
306                    let expected_continuation_indent = current_list.content_column;
307                    let actual_indent = line_content.len() - line_content.trim_start().len();
308
309                    if actual_indent < expected_continuation_indent {
310                        break; // Line is not indented enough to be part of this list item
311                    }
312
313                    // If we find a continuation line, this is multi-line
314                    if actual_indent >= expected_continuation_indent {
315                        return true;
316                    }
317                }
318
319                // Empty lines don't affect the multi-line status by themselves
320            }
321        }
322
323        false
324    }
325
326    /// Fix list marker spacing with context - handles tabs, multiple spaces, and mixed whitespace
327    fn try_fix_list_marker_spacing_with_context(&self, line: &str, is_multi_line: bool) -> Option<String> {
328        let trimmed = line.trim_start();
329        let indent = &line[..line.len() - trimmed.len()];
330
331        // Check for unordered list markers
332        for marker in &["*", "-", "+"] {
333            if let Some(after_marker) = trimmed.strip_prefix(marker) {
334                // Fix if there are tabs, multiple spaces, or mixed whitespace
335                if after_marker.starts_with('\t')
336                    || after_marker.starts_with("  ")
337                    || (after_marker.starts_with(' ') && after_marker.as_bytes().get(1) == Some(&b'\t'))
338                {
339                    let content = after_marker.trim_start();
340                    if !content.is_empty() {
341                        // Use appropriate configuration based on whether it's multi-line
342                        let spaces = if is_multi_line {
343                            " ".repeat(self.config.ul_multi)
344                        } else {
345                            " ".repeat(self.config.ul_single)
346                        };
347                        return Some(format!("{indent}{marker}{spaces}{content}"));
348                    }
349                }
350                break; // Found a marker, don't check others
351            }
352        }
353
354        // Check for ordered list markers
355        if let Some(dot_pos) = trimmed.find('.') {
356            let before_dot = &trimmed[..dot_pos];
357            if before_dot.chars().all(|c| c.is_ascii_digit()) && !before_dot.is_empty() {
358                let after_dot = &trimmed[dot_pos + 1..];
359                // Fix if there are tabs, multiple spaces, or mixed whitespace
360                if after_dot.starts_with('\t')
361                    || after_dot.starts_with("  ")
362                    || (after_dot.starts_with(' ') && after_dot.as_bytes().get(1) == Some(&b'\t'))
363                {
364                    let content = after_dot.trim_start();
365                    if !content.is_empty() {
366                        // Use appropriate configuration based on whether it's multi-line
367                        let spaces = if is_multi_line {
368                            " ".repeat(self.config.ol_multi)
369                        } else {
370                            " ".repeat(self.config.ol_single)
371                        };
372                        return Some(format!("{indent}{before_dot}.{spaces}{content}"));
373                    }
374                }
375            }
376        }
377
378        None
379    }
380
381    /// Fix list marker spacing - handles tabs, multiple spaces, and mixed whitespace
382    /// (Legacy method for backward compatibility - defaults to single-line behavior)
383    /// Check if a line is part of an indented code block (4+ spaces with blank line before)
384    fn is_indented_code_block(&self, line: &str, line_idx: usize, lines: &[&str]) -> bool {
385        // Must start with 4+ spaces or tab
386        if !line.starts_with("    ") && !line.starts_with('\t') {
387            return false;
388        }
389
390        // If it's the first line, it's not an indented code block
391        if line_idx == 0 {
392            return false;
393        }
394
395        // Check if there's a blank line before this line or before the start of the indented block
396        if self.has_blank_line_before_indented_block(line_idx, lines) {
397            return true;
398        }
399
400        false
401    }
402
403    /// Check if there's a blank line before the start of an indented block
404    fn has_blank_line_before_indented_block(&self, line_idx: usize, lines: &[&str]) -> bool {
405        // Walk backwards to find the start of the indented block
406        let mut current_idx = line_idx;
407
408        // Find the first line in this indented block
409        while current_idx > 0 {
410            let current_line = lines[current_idx];
411            let prev_line = lines[current_idx - 1];
412
413            // If current line is not indented, we've gone too far
414            if !current_line.starts_with("    ") && !current_line.starts_with('\t') {
415                break;
416            }
417
418            // If previous line is not indented, check if it's blank
419            if !prev_line.starts_with("    ") && !prev_line.starts_with('\t') {
420                return prev_line.trim().is_empty();
421            }
422
423            current_idx -= 1;
424        }
425
426        false
427    }
428}
429
430impl DocumentStructureExtensions for MD030ListMarkerSpace {
431    fn has_relevant_elements(
432        &self,
433        _ctx: &crate::lint_context::LintContext,
434        doc_structure: &DocumentStructure,
435    ) -> bool {
436        !doc_structure.list_lines.is_empty()
437    }
438}
439
440#[cfg(test)]
441mod tests {
442    use super::*;
443    use crate::lint_context::LintContext;
444
445    #[test]
446    fn test_with_document_structure() {
447        let rule = MD030ListMarkerSpace::default();
448        let content = "* Item 1\n* Item 2\n  * Nested item\n1. Ordered item";
449        let structure = DocumentStructure::new(content);
450        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
451        let result = rule.check_with_structure(&ctx, &structure).unwrap();
452        assert!(
453            result.is_empty(),
454            "Correctly spaced list markers should not generate warnings"
455        );
456        let content = "*  Item 1 (too many spaces)\n* Item 2\n1.   Ordered item (too many spaces)";
457        let structure = DocumentStructure::new(content);
458        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
459        let result = rule.check_with_structure(&ctx, &structure).unwrap();
460        // Expect warnings for lines with too many spaces after the marker
461        assert_eq!(
462            result.len(),
463            2,
464            "Should flag lines with too many spaces after list marker"
465        );
466        for warning in result {
467            assert!(
468                warning.message.starts_with("Spaces after list markers (Expected:")
469                    && warning.message.contains("Actual:"),
470                "Warning message should include expected and actual values, got: '{}'",
471                warning.message
472            );
473        }
474    }
475}