rumdl_lib/rules/
md030_list_marker_space.rs

1//!
2//! Rule MD030: Spaces after list markers
3//!
4//! See [docs/md030.md](../../docs/md030.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::rule_config_serde::RuleConfig;
8use crate::rules::list_utils::ListType;
9use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
10use crate::utils::range_utils::calculate_match_range;
11use toml;
12
13mod md030_config;
14use md030_config::MD030Config;
15
16#[derive(Clone, Default)]
17pub struct MD030ListMarkerSpace {
18    config: MD030Config,
19}
20
21impl MD030ListMarkerSpace {
22    pub fn new(ul_single: usize, ul_multi: usize, ol_single: usize, ol_multi: usize) -> Self {
23        Self {
24            config: MD030Config {
25                ul_single,
26                ul_multi,
27                ol_single,
28                ol_multi,
29            },
30        }
31    }
32
33    pub fn from_config_struct(config: MD030Config) -> Self {
34        Self { config }
35    }
36
37    pub fn get_expected_spaces(&self, list_type: ListType, is_multi: bool) -> usize {
38        match (list_type, is_multi) {
39            (ListType::Unordered, false) => self.config.ul_single,
40            (ListType::Unordered, true) => self.config.ul_multi,
41            (ListType::Ordered, false) => self.config.ol_single,
42            (ListType::Ordered, true) => self.config.ol_multi,
43        }
44    }
45}
46
47impl Rule for MD030ListMarkerSpace {
48    fn name(&self) -> &'static str {
49        "MD030"
50    }
51
52    fn description(&self) -> &'static str {
53        "Spaces after list markers should be consistent"
54    }
55
56    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
57        let mut warnings = Vec::new();
58        let lines: Vec<String> = ctx.content.lines().map(|l| l.to_string()).collect();
59        let mut in_blockquote = false;
60        for (i, line) in lines.iter().enumerate() {
61            let line_num = i + 1;
62
63            // Skip if in code block
64            if let Some(line_info) = ctx.line_info(line_num)
65                && line_info.in_code_block
66            {
67                continue;
68            }
69            // Skip indented code blocks (4+ spaces or tab)
70            if line.starts_with("    ") || line.starts_with("\t") {
71                continue;
72            }
73            // Track blockquotes (for now, just skip lines starting with >)
74            let mut l = line.as_str();
75            while l.trim_start().starts_with('>') {
76                l = l.trim_start().trim_start_matches('>').trim_start();
77                in_blockquote = true;
78            }
79            if in_blockquote {
80                in_blockquote = false;
81                continue;
82            }
83            // Use pre-computed list item information
84            if let Some(line_info) = ctx.line_info(line_num)
85                && let Some(list_info) = &line_info.list_item
86            {
87                let list_type = if list_info.is_ordered {
88                    ListType::Ordered
89                } else {
90                    ListType::Unordered
91                };
92
93                // Calculate actual spacing after marker
94                let marker_end = list_info.marker_column + list_info.marker.len();
95                let actual_spaces = list_info.content_column.saturating_sub(marker_end);
96
97                // Determine if this is a multi-line list item
98                let is_multi_line = self.is_multi_line_list_item(ctx, line_num);
99                let expected_spaces = self.get_expected_spaces(list_type, is_multi_line);
100
101                // Check for tabs in the spacing
102                let line_content = &line[list_info.marker_column..];
103                let spacing_content = if line_content.len() > list_info.marker.len() {
104                    let after_marker_start = list_info.marker.len();
105                    let after_marker_end = after_marker_start + actual_spaces;
106                    &line_content[after_marker_start..after_marker_end.min(line_content.len())]
107                } else {
108                    ""
109                };
110                let has_tabs = spacing_content.contains('\t');
111
112                // Check if spacing is incorrect or contains tabs
113                if actual_spaces != expected_spaces || has_tabs {
114                    // Calculate precise character range for the problematic spacing
115                    let whitespace_start_pos = marker_end;
116                    let whitespace_len = actual_spaces;
117
118                    // Calculate the range that needs to be replaced (the entire whitespace after marker)
119                    let (start_line, start_col, end_line, end_col) =
120                        calculate_match_range(line_num, line, whitespace_start_pos, whitespace_len);
121
122                    // Generate the correct replacement text (just the correct spacing)
123                    let correct_spaces = " ".repeat(expected_spaces);
124
125                    // Calculate byte positions for the fix range
126                    let line_start_byte = ctx.line_offsets.get(line_num - 1).copied().unwrap_or(0);
127                    let whitespace_start_byte = line_start_byte + whitespace_start_pos;
128                    let whitespace_end_byte = whitespace_start_byte + whitespace_len;
129
130                    let fix = Some(crate::rule::Fix {
131                        range: whitespace_start_byte..whitespace_end_byte,
132                        replacement: correct_spaces,
133                    });
134
135                    // Generate appropriate message
136                    let message =
137                        format!("Spaces after list markers (Expected: {expected_spaces}; Actual: {actual_spaces})");
138
139                    warnings.push(LintWarning {
140                        rule_name: Some(self.name()),
141                        severity: Severity::Warning,
142                        line: start_line,
143                        column: start_col,
144                        end_line,
145                        end_column: end_col,
146                        message,
147                        fix,
148                    });
149                }
150            }
151        }
152        Ok(warnings)
153    }
154
155    fn category(&self) -> RuleCategory {
156        RuleCategory::List
157    }
158
159    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
160        ctx.content.is_empty()
161            || (!ctx.content.contains('*')
162                && !ctx.content.contains('-')
163                && !ctx.content.contains('+')
164                && !ctx.content.contains(|c: char| c.is_ascii_digit()))
165    }
166
167    fn as_any(&self) -> &dyn std::any::Any {
168        self
169    }
170
171    fn as_maybe_document_structure(&self) -> Option<&dyn crate::rule::MaybeDocumentStructure> {
172        Some(self)
173    }
174
175    fn default_config_section(&self) -> Option<(String, toml::Value)> {
176        let default_config = MD030Config::default();
177        let json_value = serde_json::to_value(&default_config).ok()?;
178        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
179
180        if let toml::Value::Table(table) = toml_value {
181            if !table.is_empty() {
182                Some((MD030Config::RULE_NAME.to_string(), toml::Value::Table(table)))
183            } else {
184                None
185            }
186        } else {
187            None
188        }
189    }
190
191    fn from_config(config: &crate::config::Config) -> Box<dyn Rule> {
192        let rule_config = crate::rule_config_serde::load_rule_config::<MD030Config>(config);
193        Box::new(Self::from_config_struct(rule_config))
194    }
195
196    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, crate::rule::LintError> {
197        let content = ctx.content;
198        let structure = crate::utils::document_structure::DocumentStructure::new(content);
199        let lines: Vec<&str> = content.lines().collect();
200        let mut result_lines = Vec::new();
201
202        for (line_idx, line) in lines.iter().enumerate() {
203            let line_num = line_idx + 1;
204
205            // Skip if in code block
206            if let Some(line_info) = ctx.line_info(line_num)
207                && line_info.in_code_block
208            {
209                result_lines.push(line.to_string());
210                continue;
211            }
212
213            // Skip if in front matter
214            if structure.is_in_front_matter(line_num) {
215                result_lines.push(line.to_string());
216                continue;
217            }
218
219            // Skip if this is an indented code block (4+ spaces with blank line before)
220            if self.is_indented_code_block(line, line_idx, &lines) {
221                result_lines.push(line.to_string());
222                continue;
223            }
224
225            // Skip blockquotes for now (conservative approach)
226            if line.trim_start().starts_with('>') {
227                result_lines.push(line.to_string());
228                continue;
229            }
230
231            // Try to fix list marker spacing
232            let is_multi_line = self.is_multi_line_list_item(ctx, line_num);
233            if let Some(fixed_line) = self.try_fix_list_marker_spacing_with_context(line, is_multi_line) {
234                result_lines.push(fixed_line);
235            } else {
236                result_lines.push(line.to_string());
237            }
238        }
239
240        // Preserve trailing newline if original content had one
241        let result = result_lines.join("\n");
242        if content.ends_with('\n') && !result.ends_with('\n') {
243            Ok(result + "\n")
244        } else {
245            Ok(result)
246        }
247    }
248}
249
250impl MD030ListMarkerSpace {
251    /// Check if a list item is multi-line (spans multiple lines or contains nested content)
252    fn is_multi_line_list_item(&self, ctx: &crate::lint_context::LintContext, line_num: usize) -> bool {
253        // Get the current list item info
254        let current_line_info = match ctx.line_info(line_num) {
255            Some(info) if info.list_item.is_some() => info,
256            _ => return false,
257        };
258
259        let current_list = current_line_info.list_item.as_ref().unwrap();
260        let lines: Vec<&str> = ctx.content.lines().collect();
261
262        // Check subsequent lines to see if they are continuation of this list item
263        for next_line_num in (line_num + 1)..=lines.len() {
264            if let Some(next_line_info) = ctx.line_info(next_line_num) {
265                // If we encounter another list item at the same or higher level, this item is done
266                if let Some(next_list) = &next_line_info.list_item {
267                    if next_list.marker_column <= current_list.marker_column {
268                        break; // Found the next list item at same/higher level
269                    }
270                    // If there's a nested list item, this is multi-line
271                    return true;
272                }
273
274                // If we encounter a non-empty line that's not indented enough to be part of this list item,
275                // this list item is done
276                let line_content = lines.get(next_line_num - 1).unwrap_or(&"");
277                if !line_content.trim().is_empty() {
278                    let expected_continuation_indent = current_list.content_column;
279                    let actual_indent = line_content.len() - line_content.trim_start().len();
280
281                    if actual_indent < expected_continuation_indent {
282                        break; // Line is not indented enough to be part of this list item
283                    }
284
285                    // If we find a continuation line, this is multi-line
286                    if actual_indent >= expected_continuation_indent {
287                        return true;
288                    }
289                }
290
291                // Empty lines don't affect the multi-line status by themselves
292            }
293        }
294
295        false
296    }
297
298    /// Fix list marker spacing with context - handles tabs, multiple spaces, and mixed whitespace
299    fn try_fix_list_marker_spacing_with_context(&self, line: &str, is_multi_line: bool) -> Option<String> {
300        let trimmed = line.trim_start();
301        let indent = &line[..line.len() - trimmed.len()];
302
303        // Check for unordered list markers
304        for marker in &["*", "-", "+"] {
305            if let Some(after_marker) = trimmed.strip_prefix(marker) {
306                // Fix if there are tabs, multiple spaces, or mixed whitespace
307                if after_marker.starts_with('\t')
308                    || after_marker.starts_with("  ")
309                    || (after_marker.starts_with(' ') && after_marker.as_bytes().get(1) == Some(&b'\t'))
310                {
311                    let content = after_marker.trim_start();
312                    if !content.is_empty() {
313                        // Use appropriate configuration based on whether it's multi-line
314                        let spaces = if is_multi_line {
315                            " ".repeat(self.config.ul_multi)
316                        } else {
317                            " ".repeat(self.config.ul_single)
318                        };
319                        return Some(format!("{indent}{marker}{spaces}{content}"));
320                    }
321                }
322                break; // Found a marker, don't check others
323            }
324        }
325
326        // Check for ordered list markers
327        if let Some(dot_pos) = trimmed.find('.') {
328            let before_dot = &trimmed[..dot_pos];
329            if before_dot.chars().all(|c| c.is_ascii_digit()) && !before_dot.is_empty() {
330                let after_dot = &trimmed[dot_pos + 1..];
331                // Fix if there are tabs, multiple spaces, or mixed whitespace
332                if after_dot.starts_with('\t')
333                    || after_dot.starts_with("  ")
334                    || (after_dot.starts_with(' ') && after_dot.as_bytes().get(1) == Some(&b'\t'))
335                {
336                    let content = after_dot.trim_start();
337                    if !content.is_empty() {
338                        // Use appropriate configuration based on whether it's multi-line
339                        let spaces = if is_multi_line {
340                            " ".repeat(self.config.ol_multi)
341                        } else {
342                            " ".repeat(self.config.ol_single)
343                        };
344                        return Some(format!("{indent}{before_dot}.{spaces}{content}"));
345                    }
346                }
347            }
348        }
349
350        None
351    }
352
353    /// Fix list marker spacing - handles tabs, multiple spaces, and mixed whitespace
354    /// (Legacy method for backward compatibility - defaults to single-line behavior)
355    /// Check if a line is part of an indented code block (4+ spaces with blank line before)
356    fn is_indented_code_block(&self, line: &str, line_idx: usize, lines: &[&str]) -> bool {
357        // Must start with 4+ spaces or tab
358        if !line.starts_with("    ") && !line.starts_with('\t') {
359            return false;
360        }
361
362        // If it's the first line, it's not an indented code block
363        if line_idx == 0 {
364            return false;
365        }
366
367        // Check if there's a blank line before this line or before the start of the indented block
368        if self.has_blank_line_before_indented_block(line_idx, lines) {
369            return true;
370        }
371
372        false
373    }
374
375    /// Check if there's a blank line before the start of an indented block
376    fn has_blank_line_before_indented_block(&self, line_idx: usize, lines: &[&str]) -> bool {
377        // Walk backwards to find the start of the indented block
378        let mut current_idx = line_idx;
379
380        // Find the first line in this indented block
381        while current_idx > 0 {
382            let current_line = lines[current_idx];
383            let prev_line = lines[current_idx - 1];
384
385            // If current line is not indented, we've gone too far
386            if !current_line.starts_with("    ") && !current_line.starts_with('\t') {
387                break;
388            }
389
390            // If previous line is not indented, check if it's blank
391            if !prev_line.starts_with("    ") && !prev_line.starts_with('\t') {
392                return prev_line.trim().is_empty();
393            }
394
395            current_idx -= 1;
396        }
397
398        false
399    }
400}
401
402impl DocumentStructureExtensions for MD030ListMarkerSpace {
403    fn has_relevant_elements(
404        &self,
405        _ctx: &crate::lint_context::LintContext,
406        doc_structure: &DocumentStructure,
407    ) -> bool {
408        !doc_structure.list_lines.is_empty()
409    }
410}
411
412#[cfg(test)]
413mod tests {
414    use super::*;
415    use crate::lint_context::LintContext;
416
417    #[test]
418    fn test_with_document_structure() {
419        let rule = MD030ListMarkerSpace::default();
420        let content = "* Item 1\n* Item 2\n  * Nested item\n1. Ordered item";
421        let structure = DocumentStructure::new(content);
422        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
423        let result = rule.check_with_structure(&ctx, &structure).unwrap();
424        assert!(
425            result.is_empty(),
426            "Correctly spaced list markers should not generate warnings"
427        );
428        let content = "*  Item 1 (too many spaces)\n* Item 2\n1.   Ordered item (too many spaces)";
429        let structure = DocumentStructure::new(content);
430        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
431        let result = rule.check_with_structure(&ctx, &structure).unwrap();
432        // Expect warnings for lines with too many spaces after the marker
433        assert_eq!(
434            result.len(),
435            2,
436            "Should flag lines with too many spaces after list marker"
437        );
438        for warning in result {
439            assert!(
440                warning.message.starts_with("Spaces after list markers (Expected:")
441                    && warning.message.contains("Actual:"),
442                "Warning message should include expected and actual values, got: '{}'",
443                warning.message
444            );
445        }
446    }
447}