rumdl_lib/rules/
md030_list_marker_space.rs

1//!
2//! Rule MD030: Spaces after list markers
3//!
4//! See [docs/md030.md](../../docs/md030.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::rule_config_serde::RuleConfig;
8use crate::rules::list_utils::ListType;
9use crate::utils::range_utils::calculate_match_range;
10use toml;
11
12mod md030_config;
13use md030_config::MD030Config;
14
15#[derive(Clone, Default)]
16pub struct MD030ListMarkerSpace {
17    config: MD030Config,
18}
19
20impl MD030ListMarkerSpace {
21    pub fn new(ul_single: usize, ul_multi: usize, ol_single: usize, ol_multi: usize) -> Self {
22        Self {
23            config: MD030Config {
24                ul_single: crate::types::PositiveUsize::new(ul_single)
25                    .unwrap_or(crate::types::PositiveUsize::from_const(1)),
26                ul_multi: crate::types::PositiveUsize::new(ul_multi)
27                    .unwrap_or(crate::types::PositiveUsize::from_const(1)),
28                ol_single: crate::types::PositiveUsize::new(ol_single)
29                    .unwrap_or(crate::types::PositiveUsize::from_const(1)),
30                ol_multi: crate::types::PositiveUsize::new(ol_multi)
31                    .unwrap_or(crate::types::PositiveUsize::from_const(1)),
32            },
33        }
34    }
35
36    pub fn from_config_struct(config: MD030Config) -> Self {
37        Self { config }
38    }
39
40    pub fn get_expected_spaces(&self, list_type: ListType, is_multi: bool) -> usize {
41        match (list_type, is_multi) {
42            (ListType::Unordered, false) => self.config.ul_single.get(),
43            (ListType::Unordered, true) => self.config.ul_multi.get(),
44            (ListType::Ordered, false) => self.config.ol_single.get(),
45            (ListType::Ordered, true) => self.config.ol_multi.get(),
46        }
47    }
48}
49
50impl Rule for MD030ListMarkerSpace {
51    fn name(&self) -> &'static str {
52        "MD030"
53    }
54
55    fn description(&self) -> &'static str {
56        "Spaces after list markers should be consistent"
57    }
58
59    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
60        let mut warnings = Vec::new();
61
62        // Early return if no list content
63        if self.should_skip(ctx) {
64            return Ok(warnings);
65        }
66
67        // Pre-filter lines that are actually list items
68        let mut list_item_lines = Vec::new();
69        for (line_num, line_info) in ctx.lines.iter().enumerate() {
70            if line_info.list_item.is_some() && !line_info.in_code_block {
71                list_item_lines.push(line_num + 1);
72            }
73        }
74
75        // Collect lines once instead of in every is_multi_line_list_item call
76        let lines: Vec<&str> = ctx.content.lines().collect();
77        let mut in_blockquote = false;
78
79        for line_num in list_item_lines {
80            let line = lines[line_num - 1];
81
82            // Skip indented code blocks (4+ spaces or tab)
83            if line.starts_with("    ") || line.starts_with("\t") {
84                continue;
85            }
86
87            // Track blockquotes (for now, just skip lines starting with >)
88            let mut l = line;
89            while l.trim_start().starts_with('>') {
90                l = l.trim_start().trim_start_matches('>').trim_start();
91                in_blockquote = true;
92            }
93            if in_blockquote {
94                in_blockquote = false;
95                continue;
96            }
97
98            // Use pre-computed list item information
99            if let Some(line_info) = ctx.line_info(line_num)
100                && let Some(list_info) = &line_info.list_item
101            {
102                let list_type = if list_info.is_ordered {
103                    ListType::Ordered
104                } else {
105                    ListType::Unordered
106                };
107
108                // Calculate actual spacing after marker
109                let marker_end = list_info.marker_column + list_info.marker.len();
110                let actual_spaces = list_info.content_column.saturating_sub(marker_end);
111
112                // Determine if this is a multi-line list item
113                let is_multi_line = self.is_multi_line_list_item(ctx, line_num, &lines);
114                let expected_spaces = self.get_expected_spaces(list_type, is_multi_line);
115
116                // Check for tabs in the spacing
117                let line_content = &line[list_info.marker_column..];
118                let spacing_content = if line_content.len() > list_info.marker.len() {
119                    let after_marker_start = list_info.marker.len();
120                    let after_marker_end = after_marker_start + actual_spaces;
121                    &line_content[after_marker_start..after_marker_end.min(line_content.len())]
122                } else {
123                    ""
124                };
125                let has_tabs = spacing_content.contains('\t');
126
127                // Check if spacing is incorrect or contains tabs
128                if actual_spaces != expected_spaces || has_tabs {
129                    // Calculate precise character range for the problematic spacing
130                    let whitespace_start_pos = marker_end;
131                    let whitespace_len = actual_spaces;
132
133                    // Calculate the range that needs to be replaced (the entire whitespace after marker)
134                    let (start_line, start_col, end_line, end_col) =
135                        calculate_match_range(line_num, line, whitespace_start_pos, whitespace_len);
136
137                    // Generate the correct replacement text (just the correct spacing)
138                    let correct_spaces = " ".repeat(expected_spaces);
139
140                    // Calculate byte positions for the fix range
141                    let line_start_byte = ctx.line_offsets.get(line_num - 1).copied().unwrap_or(0);
142                    let whitespace_start_byte = line_start_byte + whitespace_start_pos;
143                    let whitespace_end_byte = whitespace_start_byte + whitespace_len;
144
145                    let fix = Some(crate::rule::Fix {
146                        range: whitespace_start_byte..whitespace_end_byte,
147                        replacement: correct_spaces,
148                    });
149
150                    // Generate appropriate message
151                    let message =
152                        format!("Spaces after list markers (Expected: {expected_spaces}; Actual: {actual_spaces})");
153
154                    warnings.push(LintWarning {
155                        rule_name: Some(self.name().to_string()),
156                        severity: Severity::Warning,
157                        line: start_line,
158                        column: start_col,
159                        end_line,
160                        end_column: end_col,
161                        message,
162                        fix,
163                    });
164                }
165            }
166        }
167        Ok(warnings)
168    }
169
170    fn category(&self) -> RuleCategory {
171        RuleCategory::List
172    }
173
174    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
175        if ctx.content.is_empty() {
176            return true;
177        }
178
179        // Fast byte-level check for list markers (including ordered lists)
180        let bytes = ctx.content.as_bytes();
181        !bytes.contains(&b'*')
182            && !bytes.contains(&b'-')
183            && !bytes.contains(&b'+')
184            && !bytes.iter().any(|&b| b.is_ascii_digit())
185    }
186
187    fn as_any(&self) -> &dyn std::any::Any {
188        self
189    }
190
191    fn default_config_section(&self) -> Option<(String, toml::Value)> {
192        let default_config = MD030Config::default();
193        let json_value = serde_json::to_value(&default_config).ok()?;
194        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
195
196        if let toml::Value::Table(table) = toml_value {
197            if !table.is_empty() {
198                Some((MD030Config::RULE_NAME.to_string(), toml::Value::Table(table)))
199            } else {
200                None
201            }
202        } else {
203            None
204        }
205    }
206
207    fn from_config(config: &crate::config::Config) -> Box<dyn Rule> {
208        let rule_config = crate::rule_config_serde::load_rule_config::<MD030Config>(config);
209        Box::new(Self::from_config_struct(rule_config))
210    }
211
212    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, crate::rule::LintError> {
213        let content = ctx.content;
214
215        // Early return if no fixes needed
216        if self.should_skip(ctx) {
217            return Ok(content.to_string());
218        }
219
220        // DocumentStructure is no longer used for optimization
221        let lines: Vec<&str> = content.lines().collect();
222        let mut result_lines = Vec::with_capacity(lines.len());
223
224        // Pre-compute which lines need potential fixes
225        let mut needs_check = vec![false; lines.len()];
226        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
227            if line_info.list_item.is_some() && !line_info.in_code_block {
228                needs_check[line_idx] = true;
229            }
230        }
231
232        for (line_idx, line) in lines.iter().enumerate() {
233            let line_num = line_idx + 1;
234
235            // Quick check: if this line doesn't need checking, just add it
236            if !needs_check[line_idx] {
237                result_lines.push(line.to_string());
238                continue;
239            }
240
241            // Skip if in front matter
242            // Note: Front matter checking is handled by LintContext directly
243            // No additional front matter check needed here
244
245            // Skip if this is an indented code block (4+ spaces with blank line before)
246            if self.is_indented_code_block(line, line_idx, &lines) {
247                result_lines.push(line.to_string());
248                continue;
249            }
250
251            // Skip blockquotes for now (conservative approach)
252            if line.trim_start().starts_with('>') {
253                result_lines.push(line.to_string());
254                continue;
255            }
256
257            // Try to fix list marker spacing
258            let is_multi_line = self.is_multi_line_list_item(ctx, line_num, &lines);
259            if let Some(fixed_line) = self.try_fix_list_marker_spacing_with_context(line, is_multi_line) {
260                result_lines.push(fixed_line);
261            } else {
262                result_lines.push(line.to_string());
263            }
264        }
265
266        // Preserve trailing newline if original content had one
267        let result = result_lines.join("\n");
268        if content.ends_with('\n') && !result.ends_with('\n') {
269            Ok(result + "\n")
270        } else {
271            Ok(result)
272        }
273    }
274}
275
276impl MD030ListMarkerSpace {
277    /// Check if a list item is multi-line (spans multiple lines or contains nested content)
278    fn is_multi_line_list_item(&self, ctx: &crate::lint_context::LintContext, line_num: usize, lines: &[&str]) -> bool {
279        // Get the current list item info
280        let current_line_info = match ctx.line_info(line_num) {
281            Some(info) if info.list_item.is_some() => info,
282            _ => return false,
283        };
284
285        let current_list = current_line_info.list_item.as_ref().unwrap();
286
287        // Check subsequent lines to see if they are continuation of this list item
288        for next_line_num in (line_num + 1)..=lines.len() {
289            if let Some(next_line_info) = ctx.line_info(next_line_num) {
290                // If we encounter another list item at the same or higher level, this item is done
291                if let Some(next_list) = &next_line_info.list_item {
292                    if next_list.marker_column <= current_list.marker_column {
293                        break; // Found the next list item at same/higher level
294                    }
295                    // If there's a nested list item, this is multi-line
296                    return true;
297                }
298
299                // If we encounter a non-empty line that's not indented enough to be part of this list item,
300                // this list item is done
301                let line_content = lines.get(next_line_num - 1).unwrap_or(&"");
302                if !line_content.trim().is_empty() {
303                    let expected_continuation_indent = current_list.content_column;
304                    let actual_indent = line_content.len() - line_content.trim_start().len();
305
306                    if actual_indent < expected_continuation_indent {
307                        break; // Line is not indented enough to be part of this list item
308                    }
309
310                    // If we find a continuation line, this is multi-line
311                    if actual_indent >= expected_continuation_indent {
312                        return true;
313                    }
314                }
315
316                // Empty lines don't affect the multi-line status by themselves
317            }
318        }
319
320        false
321    }
322
323    /// Helper to fix marker spacing for both ordered and unordered lists
324    fn fix_marker_spacing(
325        &self,
326        marker: &str,
327        after_marker: &str,
328        indent: &str,
329        is_multi_line: bool,
330        is_ordered: bool,
331    ) -> Option<String> {
332        // Fix if there are tabs, multiple spaces, or mixed whitespace
333        if after_marker.starts_with('\t')
334            || after_marker.starts_with("  ")
335            || (after_marker.starts_with(' ') && after_marker.as_bytes().get(1) == Some(&b'\t'))
336        {
337            let content = after_marker.trim_start();
338            if !content.is_empty() {
339                // Use appropriate configuration based on list type and whether it's multi-line
340                let spaces = if is_ordered {
341                    if is_multi_line {
342                        " ".repeat(self.config.ol_multi.get())
343                    } else {
344                        " ".repeat(self.config.ol_single.get())
345                    }
346                } else if is_multi_line {
347                    " ".repeat(self.config.ul_multi.get())
348                } else {
349                    " ".repeat(self.config.ul_single.get())
350                };
351                return Some(format!("{indent}{marker}{spaces}{content}"));
352            }
353        }
354        None
355    }
356
357    /// Fix list marker spacing with context - handles tabs, multiple spaces, and mixed whitespace
358    fn try_fix_list_marker_spacing_with_context(&self, line: &str, is_multi_line: bool) -> Option<String> {
359        let trimmed = line.trim_start();
360        let indent = &line[..line.len() - trimmed.len()];
361
362        // Check for unordered list markers
363        for marker in &["*", "-", "+"] {
364            if let Some(after_marker) = trimmed.strip_prefix(marker) {
365                if let Some(fixed) = self.fix_marker_spacing(marker, after_marker, indent, is_multi_line, false) {
366                    return Some(fixed);
367                }
368                break; // Found a marker, don't check others
369            }
370        }
371
372        // Check for ordered list markers
373        if let Some(dot_pos) = trimmed.find('.') {
374            let before_dot = &trimmed[..dot_pos];
375            if before_dot.chars().all(|c| c.is_ascii_digit()) && !before_dot.is_empty() {
376                let after_dot = &trimmed[dot_pos + 1..];
377                let marker = format!("{before_dot}.");
378                if let Some(fixed) = self.fix_marker_spacing(&marker, after_dot, indent, is_multi_line, true) {
379                    return Some(fixed);
380                }
381            }
382        }
383
384        None
385    }
386
387    /// Fix list marker spacing - handles tabs, multiple spaces, and mixed whitespace
388    /// (Legacy method for backward compatibility - defaults to single-line behavior)
389    /// Check if a line is part of an indented code block (4+ spaces with blank line before)
390    fn is_indented_code_block(&self, line: &str, line_idx: usize, lines: &[&str]) -> bool {
391        // Must start with 4+ spaces or tab
392        if !line.starts_with("    ") && !line.starts_with('\t') {
393            return false;
394        }
395
396        // If it's the first line, it's not an indented code block
397        if line_idx == 0 {
398            return false;
399        }
400
401        // Check if there's a blank line before this line or before the start of the indented block
402        if self.has_blank_line_before_indented_block(line_idx, lines) {
403            return true;
404        }
405
406        false
407    }
408
409    /// Check if there's a blank line before the start of an indented block
410    fn has_blank_line_before_indented_block(&self, line_idx: usize, lines: &[&str]) -> bool {
411        // Walk backwards to find the start of the indented block
412        let mut current_idx = line_idx;
413
414        // Find the first line in this indented block
415        while current_idx > 0 {
416            let current_line = lines[current_idx];
417            let prev_line = lines[current_idx - 1];
418
419            // If current line is not indented, we've gone too far
420            if !current_line.starts_with("    ") && !current_line.starts_with('\t') {
421                break;
422            }
423
424            // If previous line is not indented, check if it's blank
425            if !prev_line.starts_with("    ") && !prev_line.starts_with('\t') {
426                return prev_line.trim().is_empty();
427            }
428
429            current_idx -= 1;
430        }
431
432        false
433    }
434}
435
436#[cfg(test)]
437mod tests {
438    use super::*;
439    use crate::lint_context::LintContext;
440
441    #[test]
442    fn test_basic_functionality() {
443        let rule = MD030ListMarkerSpace::default();
444        let content = "* Item 1\n* Item 2\n  * Nested item\n1. Ordered item";
445        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
446        let result = rule.check(&ctx).unwrap();
447        assert!(
448            result.is_empty(),
449            "Correctly spaced list markers should not generate warnings"
450        );
451        let content = "*  Item 1 (too many spaces)\n* Item 2\n1.   Ordered item (too many spaces)";
452        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
453        let result = rule.check(&ctx).unwrap();
454        // Expect warnings for lines with too many spaces after the marker
455        assert_eq!(
456            result.len(),
457            2,
458            "Should flag lines with too many spaces after list marker"
459        );
460        for warning in result {
461            assert!(
462                warning.message.starts_with("Spaces after list markers (Expected:")
463                    && warning.message.contains("Actual:"),
464                "Warning message should include expected and actual values, got: '{}'",
465                warning.message
466            );
467        }
468    }
469}