rumdl_lib/rules/
md030_list_marker_space.rs

1//!
2//! Rule MD030: Spaces after list markers
3//!
4//! See [docs/md030.md](../../docs/md030.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::rule_config_serde::RuleConfig;
8use crate::rules::list_utils::ListType;
9use crate::utils::range_utils::calculate_match_range;
10use toml;
11
12mod md030_config;
13use md030_config::MD030Config;
14
15#[derive(Clone, Default)]
16pub struct MD030ListMarkerSpace {
17    config: MD030Config,
18}
19
20impl MD030ListMarkerSpace {
21    pub fn new(ul_single: usize, ul_multi: usize, ol_single: usize, ol_multi: usize) -> Self {
22        Self {
23            config: MD030Config {
24                ul_single,
25                ul_multi,
26                ol_single,
27                ol_multi,
28            },
29        }
30    }
31
32    pub fn from_config_struct(config: MD030Config) -> Self {
33        Self { config }
34    }
35
36    pub fn get_expected_spaces(&self, list_type: ListType, is_multi: bool) -> usize {
37        match (list_type, is_multi) {
38            (ListType::Unordered, false) => self.config.ul_single,
39            (ListType::Unordered, true) => self.config.ul_multi,
40            (ListType::Ordered, false) => self.config.ol_single,
41            (ListType::Ordered, true) => self.config.ol_multi,
42        }
43    }
44}
45
46impl Rule for MD030ListMarkerSpace {
47    fn name(&self) -> &'static str {
48        "MD030"
49    }
50
51    fn description(&self) -> &'static str {
52        "Spaces after list markers should be consistent"
53    }
54
55    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
56        let mut warnings = Vec::new();
57
58        // Early return if no list content
59        if self.should_skip(ctx) {
60            return Ok(warnings);
61        }
62
63        // Pre-filter lines that are actually list items
64        let mut list_item_lines = Vec::new();
65        for (line_num, line_info) in ctx.lines.iter().enumerate() {
66            if line_info.list_item.is_some() && !line_info.in_code_block {
67                list_item_lines.push(line_num + 1);
68            }
69        }
70
71        // Collect lines once instead of in every is_multi_line_list_item call
72        let lines: Vec<&str> = ctx.content.lines().collect();
73        let mut in_blockquote = false;
74
75        for line_num in list_item_lines {
76            let line = lines[line_num - 1];
77
78            // Skip indented code blocks (4+ spaces or tab)
79            if line.starts_with("    ") || line.starts_with("\t") {
80                continue;
81            }
82
83            // Track blockquotes (for now, just skip lines starting with >)
84            let mut l = line;
85            while l.trim_start().starts_with('>') {
86                l = l.trim_start().trim_start_matches('>').trim_start();
87                in_blockquote = true;
88            }
89            if in_blockquote {
90                in_blockquote = false;
91                continue;
92            }
93
94            // Use pre-computed list item information
95            if let Some(line_info) = ctx.line_info(line_num)
96                && let Some(list_info) = &line_info.list_item
97            {
98                let list_type = if list_info.is_ordered {
99                    ListType::Ordered
100                } else {
101                    ListType::Unordered
102                };
103
104                // Calculate actual spacing after marker
105                let marker_end = list_info.marker_column + list_info.marker.len();
106                let actual_spaces = list_info.content_column.saturating_sub(marker_end);
107
108                // Determine if this is a multi-line list item
109                let is_multi_line = self.is_multi_line_list_item(ctx, line_num, &lines);
110                let expected_spaces = self.get_expected_spaces(list_type, is_multi_line);
111
112                // Check for tabs in the spacing
113                let line_content = &line[list_info.marker_column..];
114                let spacing_content = if line_content.len() > list_info.marker.len() {
115                    let after_marker_start = list_info.marker.len();
116                    let after_marker_end = after_marker_start + actual_spaces;
117                    &line_content[after_marker_start..after_marker_end.min(line_content.len())]
118                } else {
119                    ""
120                };
121                let has_tabs = spacing_content.contains('\t');
122
123                // Check if spacing is incorrect or contains tabs
124                if actual_spaces != expected_spaces || has_tabs {
125                    // Calculate precise character range for the problematic spacing
126                    let whitespace_start_pos = marker_end;
127                    let whitespace_len = actual_spaces;
128
129                    // Calculate the range that needs to be replaced (the entire whitespace after marker)
130                    let (start_line, start_col, end_line, end_col) =
131                        calculate_match_range(line_num, line, whitespace_start_pos, whitespace_len);
132
133                    // Generate the correct replacement text (just the correct spacing)
134                    let correct_spaces = " ".repeat(expected_spaces);
135
136                    // Calculate byte positions for the fix range
137                    let line_start_byte = ctx.line_offsets.get(line_num - 1).copied().unwrap_or(0);
138                    let whitespace_start_byte = line_start_byte + whitespace_start_pos;
139                    let whitespace_end_byte = whitespace_start_byte + whitespace_len;
140
141                    let fix = Some(crate::rule::Fix {
142                        range: whitespace_start_byte..whitespace_end_byte,
143                        replacement: correct_spaces,
144                    });
145
146                    // Generate appropriate message
147                    let message =
148                        format!("Spaces after list markers (Expected: {expected_spaces}; Actual: {actual_spaces})");
149
150                    warnings.push(LintWarning {
151                        rule_name: Some(self.name()),
152                        severity: Severity::Warning,
153                        line: start_line,
154                        column: start_col,
155                        end_line,
156                        end_column: end_col,
157                        message,
158                        fix,
159                    });
160                }
161            }
162        }
163        Ok(warnings)
164    }
165
166    fn category(&self) -> RuleCategory {
167        RuleCategory::List
168    }
169
170    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
171        if ctx.content.is_empty() {
172            return true;
173        }
174
175        // Fast byte-level check for list markers (including ordered lists)
176        let bytes = ctx.content.as_bytes();
177        !bytes.contains(&b'*')
178            && !bytes.contains(&b'-')
179            && !bytes.contains(&b'+')
180            && !bytes.iter().any(|&b| b.is_ascii_digit())
181    }
182
183    fn as_any(&self) -> &dyn std::any::Any {
184        self
185    }
186
187    fn default_config_section(&self) -> Option<(String, toml::Value)> {
188        let default_config = MD030Config::default();
189        let json_value = serde_json::to_value(&default_config).ok()?;
190        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
191
192        if let toml::Value::Table(table) = toml_value {
193            if !table.is_empty() {
194                Some((MD030Config::RULE_NAME.to_string(), toml::Value::Table(table)))
195            } else {
196                None
197            }
198        } else {
199            None
200        }
201    }
202
203    fn from_config(config: &crate::config::Config) -> Box<dyn Rule> {
204        let rule_config = crate::rule_config_serde::load_rule_config::<MD030Config>(config);
205        Box::new(Self::from_config_struct(rule_config))
206    }
207
208    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, crate::rule::LintError> {
209        let content = ctx.content;
210
211        // Early return if no fixes needed
212        if self.should_skip(ctx) {
213            return Ok(content.to_string());
214        }
215
216        // DocumentStructure is no longer used for optimization
217        let lines: Vec<&str> = content.lines().collect();
218        let mut result_lines = Vec::with_capacity(lines.len());
219
220        // Pre-compute which lines need potential fixes
221        let mut needs_check = vec![false; lines.len()];
222        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
223            if line_info.list_item.is_some() && !line_info.in_code_block {
224                needs_check[line_idx] = true;
225            }
226        }
227
228        for (line_idx, line) in lines.iter().enumerate() {
229            let line_num = line_idx + 1;
230
231            // Quick check: if this line doesn't need checking, just add it
232            if !needs_check[line_idx] {
233                result_lines.push(line.to_string());
234                continue;
235            }
236
237            // Skip if in front matter
238            // Note: Front matter checking is handled by LintContext directly
239            // No additional front matter check needed here
240
241            // Skip if this is an indented code block (4+ spaces with blank line before)
242            if self.is_indented_code_block(line, line_idx, &lines) {
243                result_lines.push(line.to_string());
244                continue;
245            }
246
247            // Skip blockquotes for now (conservative approach)
248            if line.trim_start().starts_with('>') {
249                result_lines.push(line.to_string());
250                continue;
251            }
252
253            // Try to fix list marker spacing
254            let is_multi_line = self.is_multi_line_list_item(ctx, line_num, &lines);
255            if let Some(fixed_line) = self.try_fix_list_marker_spacing_with_context(line, is_multi_line) {
256                result_lines.push(fixed_line);
257            } else {
258                result_lines.push(line.to_string());
259            }
260        }
261
262        // Preserve trailing newline if original content had one
263        let result = result_lines.join("\n");
264        if content.ends_with('\n') && !result.ends_with('\n') {
265            Ok(result + "\n")
266        } else {
267            Ok(result)
268        }
269    }
270}
271
272impl MD030ListMarkerSpace {
273    /// Check if a list item is multi-line (spans multiple lines or contains nested content)
274    fn is_multi_line_list_item(&self, ctx: &crate::lint_context::LintContext, line_num: usize, lines: &[&str]) -> bool {
275        // Get the current list item info
276        let current_line_info = match ctx.line_info(line_num) {
277            Some(info) if info.list_item.is_some() => info,
278            _ => return false,
279        };
280
281        let current_list = current_line_info.list_item.as_ref().unwrap();
282
283        // Check subsequent lines to see if they are continuation of this list item
284        for next_line_num in (line_num + 1)..=lines.len() {
285            if let Some(next_line_info) = ctx.line_info(next_line_num) {
286                // If we encounter another list item at the same or higher level, this item is done
287                if let Some(next_list) = &next_line_info.list_item {
288                    if next_list.marker_column <= current_list.marker_column {
289                        break; // Found the next list item at same/higher level
290                    }
291                    // If there's a nested list item, this is multi-line
292                    return true;
293                }
294
295                // If we encounter a non-empty line that's not indented enough to be part of this list item,
296                // this list item is done
297                let line_content = lines.get(next_line_num - 1).unwrap_or(&"");
298                if !line_content.trim().is_empty() {
299                    let expected_continuation_indent = current_list.content_column;
300                    let actual_indent = line_content.len() - line_content.trim_start().len();
301
302                    if actual_indent < expected_continuation_indent {
303                        break; // Line is not indented enough to be part of this list item
304                    }
305
306                    // If we find a continuation line, this is multi-line
307                    if actual_indent >= expected_continuation_indent {
308                        return true;
309                    }
310                }
311
312                // Empty lines don't affect the multi-line status by themselves
313            }
314        }
315
316        false
317    }
318
319    /// Fix list marker spacing with context - handles tabs, multiple spaces, and mixed whitespace
320    fn try_fix_list_marker_spacing_with_context(&self, line: &str, is_multi_line: bool) -> Option<String> {
321        let trimmed = line.trim_start();
322        let indent = &line[..line.len() - trimmed.len()];
323
324        // Check for unordered list markers
325        for marker in &["*", "-", "+"] {
326            if let Some(after_marker) = trimmed.strip_prefix(marker) {
327                // Fix if there are tabs, multiple spaces, or mixed whitespace
328                if after_marker.starts_with('\t')
329                    || after_marker.starts_with("  ")
330                    || (after_marker.starts_with(' ') && after_marker.as_bytes().get(1) == Some(&b'\t'))
331                {
332                    let content = after_marker.trim_start();
333                    if !content.is_empty() {
334                        // Use appropriate configuration based on whether it's multi-line
335                        let spaces = if is_multi_line {
336                            " ".repeat(self.config.ul_multi)
337                        } else {
338                            " ".repeat(self.config.ul_single)
339                        };
340                        return Some(format!("{indent}{marker}{spaces}{content}"));
341                    }
342                }
343                break; // Found a marker, don't check others
344            }
345        }
346
347        // Check for ordered list markers
348        if let Some(dot_pos) = trimmed.find('.') {
349            let before_dot = &trimmed[..dot_pos];
350            if before_dot.chars().all(|c| c.is_ascii_digit()) && !before_dot.is_empty() {
351                let after_dot = &trimmed[dot_pos + 1..];
352                // Fix if there are tabs, multiple spaces, or mixed whitespace
353                if after_dot.starts_with('\t')
354                    || after_dot.starts_with("  ")
355                    || (after_dot.starts_with(' ') && after_dot.as_bytes().get(1) == Some(&b'\t'))
356                {
357                    let content = after_dot.trim_start();
358                    if !content.is_empty() {
359                        // Use appropriate configuration based on whether it's multi-line
360                        let spaces = if is_multi_line {
361                            " ".repeat(self.config.ol_multi)
362                        } else {
363                            " ".repeat(self.config.ol_single)
364                        };
365                        return Some(format!("{indent}{before_dot}.{spaces}{content}"));
366                    }
367                }
368            }
369        }
370
371        None
372    }
373
374    /// Fix list marker spacing - handles tabs, multiple spaces, and mixed whitespace
375    /// (Legacy method for backward compatibility - defaults to single-line behavior)
376    /// Check if a line is part of an indented code block (4+ spaces with blank line before)
377    fn is_indented_code_block(&self, line: &str, line_idx: usize, lines: &[&str]) -> bool {
378        // Must start with 4+ spaces or tab
379        if !line.starts_with("    ") && !line.starts_with('\t') {
380            return false;
381        }
382
383        // If it's the first line, it's not an indented code block
384        if line_idx == 0 {
385            return false;
386        }
387
388        // Check if there's a blank line before this line or before the start of the indented block
389        if self.has_blank_line_before_indented_block(line_idx, lines) {
390            return true;
391        }
392
393        false
394    }
395
396    /// Check if there's a blank line before the start of an indented block
397    fn has_blank_line_before_indented_block(&self, line_idx: usize, lines: &[&str]) -> bool {
398        // Walk backwards to find the start of the indented block
399        let mut current_idx = line_idx;
400
401        // Find the first line in this indented block
402        while current_idx > 0 {
403            let current_line = lines[current_idx];
404            let prev_line = lines[current_idx - 1];
405
406            // If current line is not indented, we've gone too far
407            if !current_line.starts_with("    ") && !current_line.starts_with('\t') {
408                break;
409            }
410
411            // If previous line is not indented, check if it's blank
412            if !prev_line.starts_with("    ") && !prev_line.starts_with('\t') {
413                return prev_line.trim().is_empty();
414            }
415
416            current_idx -= 1;
417        }
418
419        false
420    }
421}
422
423#[cfg(test)]
424mod tests {
425    use super::*;
426    use crate::lint_context::LintContext;
427
428    #[test]
429    fn test_basic_functionality() {
430        let rule = MD030ListMarkerSpace::default();
431        let content = "* Item 1\n* Item 2\n  * Nested item\n1. Ordered item";
432        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
433        let result = rule.check(&ctx).unwrap();
434        assert!(
435            result.is_empty(),
436            "Correctly spaced list markers should not generate warnings"
437        );
438        let content = "*  Item 1 (too many spaces)\n* Item 2\n1.   Ordered item (too many spaces)";
439        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
440        let result = rule.check(&ctx).unwrap();
441        // Expect warnings for lines with too many spaces after the marker
442        assert_eq!(
443            result.len(),
444            2,
445            "Should flag lines with too many spaces after list marker"
446        );
447        for warning in result {
448            assert!(
449                warning.message.starts_with("Spaces after list markers (Expected:")
450                    && warning.message.contains("Actual:"),
451                "Warning message should include expected and actual values, got: '{}'",
452                warning.message
453            );
454        }
455    }
456}