rumdl_lib/rules/
md030_list_marker_space.rs

1//!
2//! Rule MD030: Spaces after list markers
3//!
4//! See [docs/md030.md](../../docs/md030.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::rule_config_serde::RuleConfig;
8use crate::rules::list_utils::ListType;
9use crate::utils::element_cache::ElementCache;
10use crate::utils::range_utils::calculate_match_range;
11use toml;
12
13mod md030_config;
14use md030_config::MD030Config;
15
16#[derive(Clone, Default)]
17pub struct MD030ListMarkerSpace {
18    config: MD030Config,
19}
20
21impl MD030ListMarkerSpace {
22    pub fn new(ul_single: usize, ul_multi: usize, ol_single: usize, ol_multi: usize) -> Self {
23        Self {
24            config: MD030Config {
25                ul_single: crate::types::PositiveUsize::new(ul_single)
26                    .unwrap_or(crate::types::PositiveUsize::from_const(1)),
27                ul_multi: crate::types::PositiveUsize::new(ul_multi)
28                    .unwrap_or(crate::types::PositiveUsize::from_const(1)),
29                ol_single: crate::types::PositiveUsize::new(ol_single)
30                    .unwrap_or(crate::types::PositiveUsize::from_const(1)),
31                ol_multi: crate::types::PositiveUsize::new(ol_multi)
32                    .unwrap_or(crate::types::PositiveUsize::from_const(1)),
33            },
34        }
35    }
36
37    pub fn from_config_struct(config: MD030Config) -> Self {
38        Self { config }
39    }
40
41    pub fn get_expected_spaces(&self, list_type: ListType, is_multi: bool) -> usize {
42        match (list_type, is_multi) {
43            (ListType::Unordered, false) => self.config.ul_single.get(),
44            (ListType::Unordered, true) => self.config.ul_multi.get(),
45            (ListType::Ordered, false) => self.config.ol_single.get(),
46            (ListType::Ordered, true) => self.config.ol_multi.get(),
47        }
48    }
49}
50
51impl Rule for MD030ListMarkerSpace {
52    fn name(&self) -> &'static str {
53        "MD030"
54    }
55
56    fn description(&self) -> &'static str {
57        "Spaces after list markers should be consistent"
58    }
59
60    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
61        let mut warnings = Vec::new();
62
63        // Early return if no list content
64        if self.should_skip(ctx) {
65            return Ok(warnings);
66        }
67
68        // Pre-filter lines that are actually list items
69        let mut list_item_lines = Vec::new();
70        for (line_num, line_info) in ctx.lines.iter().enumerate() {
71            if line_info.list_item.is_some() && !line_info.in_code_block {
72                list_item_lines.push(line_num + 1);
73            }
74        }
75
76        // Collect lines once instead of in every is_multi_line_list_item call
77        let lines: Vec<&str> = ctx.content.lines().collect();
78
79        for line_num in list_item_lines {
80            let line = lines[line_num - 1];
81
82            // Skip indented code blocks (4+ columns accounting for tab expansion)
83            if ElementCache::calculate_indentation_width_default(line) >= 4 {
84                continue;
85            }
86
87            // Use pre-computed list item information
88            // LintContext already handles blockquotes by stripping prefixes and
89            // storing correct marker_column/content_column values
90            if let Some(line_info) = ctx.line_info(line_num)
91                && let Some(list_info) = &line_info.list_item
92            {
93                let list_type = if list_info.is_ordered {
94                    ListType::Ordered
95                } else {
96                    ListType::Unordered
97                };
98
99                // Calculate actual spacing after marker
100                let marker_end = list_info.marker_column + list_info.marker.len();
101                let actual_spaces = list_info.content_column.saturating_sub(marker_end);
102
103                // Determine if this is a multi-line list item
104                let is_multi_line = self.is_multi_line_list_item(ctx, line_num, &lines);
105                let expected_spaces = self.get_expected_spaces(list_type, is_multi_line);
106
107                // MD030 only checks for incorrect number of spaces, not tabs
108                // Tabs are handled by MD010 (no-hard-tabs), matching markdownlint behavior
109                // Check if spacing is incorrect
110                if actual_spaces != expected_spaces {
111                    // Calculate precise character range for the problematic spacing
112                    let whitespace_start_pos = marker_end;
113                    let whitespace_len = actual_spaces;
114
115                    // Calculate the range that needs to be replaced (the entire whitespace after marker)
116                    let (start_line, start_col, end_line, end_col) =
117                        calculate_match_range(line_num, line, whitespace_start_pos, whitespace_len);
118
119                    // Generate the correct replacement text (just the correct spacing)
120                    let correct_spaces = " ".repeat(expected_spaces);
121
122                    // Calculate byte positions for the fix range
123                    let line_start_byte = ctx.line_offsets.get(line_num - 1).copied().unwrap_or(0);
124                    let whitespace_start_byte = line_start_byte + whitespace_start_pos;
125                    let whitespace_end_byte = whitespace_start_byte + whitespace_len;
126
127                    let fix = Some(crate::rule::Fix {
128                        range: whitespace_start_byte..whitespace_end_byte,
129                        replacement: correct_spaces,
130                    });
131
132                    // Generate appropriate message
133                    let message =
134                        format!("Spaces after list markers (Expected: {expected_spaces}; Actual: {actual_spaces})");
135
136                    warnings.push(LintWarning {
137                        rule_name: Some(self.name().to_string()),
138                        severity: Severity::Warning,
139                        line: start_line,
140                        column: start_col,
141                        end_line,
142                        end_column: end_col,
143                        message,
144                        fix,
145                    });
146                }
147            }
148        }
149        Ok(warnings)
150    }
151
152    fn category(&self) -> RuleCategory {
153        RuleCategory::List
154    }
155
156    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
157        if ctx.content.is_empty() {
158            return true;
159        }
160
161        // Fast byte-level check for list markers (including ordered lists)
162        let bytes = ctx.content.as_bytes();
163        !bytes.contains(&b'*')
164            && !bytes.contains(&b'-')
165            && !bytes.contains(&b'+')
166            && !bytes.iter().any(|&b| b.is_ascii_digit())
167    }
168
169    fn as_any(&self) -> &dyn std::any::Any {
170        self
171    }
172
173    fn default_config_section(&self) -> Option<(String, toml::Value)> {
174        let default_config = MD030Config::default();
175        let json_value = serde_json::to_value(&default_config).ok()?;
176        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
177
178        if let toml::Value::Table(table) = toml_value {
179            if !table.is_empty() {
180                Some((MD030Config::RULE_NAME.to_string(), toml::Value::Table(table)))
181            } else {
182                None
183            }
184        } else {
185            None
186        }
187    }
188
189    fn from_config(config: &crate::config::Config) -> Box<dyn Rule> {
190        let rule_config = crate::rule_config_serde::load_rule_config::<MD030Config>(config);
191        Box::new(Self::from_config_struct(rule_config))
192    }
193
194    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, crate::rule::LintError> {
195        let content = ctx.content;
196
197        // Early return if no fixes needed
198        if self.should_skip(ctx) {
199            return Ok(content.to_string());
200        }
201
202        // DocumentStructure is no longer used for optimization
203        let lines: Vec<&str> = content.lines().collect();
204        let mut result_lines = Vec::with_capacity(lines.len());
205
206        // Pre-compute which lines need potential fixes
207        let mut needs_check = vec![false; lines.len()];
208        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
209            if line_info.list_item.is_some() && !line_info.in_code_block {
210                needs_check[line_idx] = true;
211            }
212        }
213
214        for (line_idx, line) in lines.iter().enumerate() {
215            let line_num = line_idx + 1;
216
217            // Quick check: if this line doesn't need checking, just add it
218            if !needs_check[line_idx] {
219                result_lines.push(line.to_string());
220                continue;
221            }
222
223            // Skip if in front matter
224            // Note: Front matter checking is handled by LintContext directly
225            // No additional front matter check needed here
226
227            // Skip if this is an indented code block (4+ spaces with blank line before)
228            if self.is_indented_code_block(line, line_idx, &lines) {
229                result_lines.push(line.to_string());
230                continue;
231            }
232
233            // Try to fix list marker spacing (handles blockquotes internally)
234            let is_multi_line = self.is_multi_line_list_item(ctx, line_num, &lines);
235            if let Some(fixed_line) = self.try_fix_list_marker_spacing_with_context(line, is_multi_line) {
236                result_lines.push(fixed_line);
237            } else {
238                result_lines.push(line.to_string());
239            }
240        }
241
242        // Preserve trailing newline if original content had one
243        let result = result_lines.join("\n");
244        if content.ends_with('\n') && !result.ends_with('\n') {
245            Ok(result + "\n")
246        } else {
247            Ok(result)
248        }
249    }
250}
251
252impl MD030ListMarkerSpace {
253    /// Check if a list item is multi-line (spans multiple lines or contains nested content)
254    fn is_multi_line_list_item(&self, ctx: &crate::lint_context::LintContext, line_num: usize, lines: &[&str]) -> bool {
255        // Get the current list item info
256        let current_line_info = match ctx.line_info(line_num) {
257            Some(info) if info.list_item.is_some() => info,
258            _ => return false,
259        };
260
261        let current_list = current_line_info.list_item.as_ref().unwrap();
262
263        // Check subsequent lines to see if they are continuation of this list item
264        for next_line_num in (line_num + 1)..=lines.len() {
265            if let Some(next_line_info) = ctx.line_info(next_line_num) {
266                // If we encounter another list item at the same or higher level, this item is done
267                if let Some(next_list) = &next_line_info.list_item {
268                    if next_list.marker_column <= current_list.marker_column {
269                        break; // Found the next list item at same/higher level
270                    }
271                    // If there's a nested list item, this is multi-line
272                    return true;
273                }
274
275                // If we encounter a non-empty line that's not indented enough to be part of this list item,
276                // this list item is done
277                let line_content = lines.get(next_line_num - 1).unwrap_or(&"");
278                if !line_content.trim().is_empty() {
279                    let expected_continuation_indent = current_list.content_column;
280                    let actual_indent = line_content.len() - line_content.trim_start().len();
281
282                    if actual_indent < expected_continuation_indent {
283                        break; // Line is not indented enough to be part of this list item
284                    }
285
286                    // If we find a continuation line, this is multi-line
287                    if actual_indent >= expected_continuation_indent {
288                        return true;
289                    }
290                }
291
292                // Empty lines don't affect the multi-line status by themselves
293            }
294        }
295
296        false
297    }
298
299    /// Helper to fix marker spacing for both ordered and unordered lists
300    fn fix_marker_spacing(
301        &self,
302        marker: &str,
303        after_marker: &str,
304        indent: &str,
305        is_multi_line: bool,
306        is_ordered: bool,
307    ) -> Option<String> {
308        // MD030 only fixes multiple spaces, not tabs
309        // Tabs are handled by MD010 (no-hard-tabs), matching markdownlint behavior
310        // Skip if the spacing starts with a tab
311        if after_marker.starts_with('\t') {
312            return None;
313        }
314
315        // Fix if there are multiple spaces
316        if after_marker.starts_with("  ") {
317            let content = after_marker.trim_start_matches(' ');
318            if !content.is_empty() {
319                // Use appropriate configuration based on list type and whether it's multi-line
320                let spaces = if is_ordered {
321                    if is_multi_line {
322                        " ".repeat(self.config.ol_multi.get())
323                    } else {
324                        " ".repeat(self.config.ol_single.get())
325                    }
326                } else if is_multi_line {
327                    " ".repeat(self.config.ul_multi.get())
328                } else {
329                    " ".repeat(self.config.ul_single.get())
330                };
331                return Some(format!("{indent}{marker}{spaces}{content}"));
332            }
333        }
334        None
335    }
336
337    /// Fix list marker spacing with context - handles tabs, multiple spaces, and mixed whitespace
338    fn try_fix_list_marker_spacing_with_context(&self, line: &str, is_multi_line: bool) -> Option<String> {
339        // Extract blockquote prefix if present
340        let (blockquote_prefix, content) = Self::strip_blockquote_prefix(line);
341
342        let trimmed = content.trim_start();
343        let indent = &content[..content.len() - trimmed.len()];
344
345        // Check for unordered list markers
346        for marker in &["*", "-", "+"] {
347            if let Some(after_marker) = trimmed.strip_prefix(marker) {
348                if let Some(fixed) = self.fix_marker_spacing(marker, after_marker, indent, is_multi_line, false) {
349                    return Some(format!("{blockquote_prefix}{fixed}"));
350                }
351                break; // Found a marker, don't check others
352            }
353        }
354
355        // Check for ordered list markers
356        if let Some(dot_pos) = trimmed.find('.') {
357            let before_dot = &trimmed[..dot_pos];
358            if before_dot.chars().all(|c| c.is_ascii_digit()) && !before_dot.is_empty() {
359                let after_dot = &trimmed[dot_pos + 1..];
360                let marker = format!("{before_dot}.");
361                if let Some(fixed) = self.fix_marker_spacing(&marker, after_dot, indent, is_multi_line, true) {
362                    return Some(format!("{blockquote_prefix}{fixed}"));
363                }
364            }
365        }
366
367        None
368    }
369
370    /// Strip blockquote prefix from a line, returning (prefix, content)
371    fn strip_blockquote_prefix(line: &str) -> (String, &str) {
372        let mut prefix = String::new();
373        let mut remaining = line;
374
375        loop {
376            let trimmed = remaining.trim_start();
377            if !trimmed.starts_with('>') {
378                break;
379            }
380            // Add leading spaces to prefix
381            let leading_spaces = remaining.len() - trimmed.len();
382            prefix.push_str(&remaining[..leading_spaces]);
383            prefix.push('>');
384            remaining = &trimmed[1..];
385
386            // Handle optional space after >
387            if remaining.starts_with(' ') {
388                prefix.push(' ');
389                remaining = &remaining[1..];
390            }
391        }
392
393        (prefix, remaining)
394    }
395
396    /// Fix list marker spacing - handles tabs, multiple spaces, and mixed whitespace
397    /// (Legacy method for backward compatibility - defaults to single-line behavior)
398    /// Check if a line is part of an indented code block (4+ columns with blank line before)
399    fn is_indented_code_block(&self, line: &str, line_idx: usize, lines: &[&str]) -> bool {
400        // Must have 4+ columns of indentation (accounting for tab expansion)
401        if ElementCache::calculate_indentation_width_default(line) < 4 {
402            return false;
403        }
404
405        // If it's the first line, it's not an indented code block
406        if line_idx == 0 {
407            return false;
408        }
409
410        // Check if there's a blank line before this line or before the start of the indented block
411        if self.has_blank_line_before_indented_block(line_idx, lines) {
412            return true;
413        }
414
415        false
416    }
417
418    /// Check if there's a blank line before the start of an indented block
419    fn has_blank_line_before_indented_block(&self, line_idx: usize, lines: &[&str]) -> bool {
420        // Walk backwards to find the start of the indented block
421        let mut current_idx = line_idx;
422
423        // Find the first line in this indented block
424        while current_idx > 0 {
425            let current_line = lines[current_idx];
426            let prev_line = lines[current_idx - 1];
427
428            // If current line is not indented (< 4 columns), we've gone too far
429            if ElementCache::calculate_indentation_width_default(current_line) < 4 {
430                break;
431            }
432
433            // If previous line is not indented, check if it's blank
434            if ElementCache::calculate_indentation_width_default(prev_line) < 4 {
435                return prev_line.trim().is_empty();
436            }
437
438            current_idx -= 1;
439        }
440
441        false
442    }
443}
444
445#[cfg(test)]
446mod tests {
447    use super::*;
448    use crate::lint_context::LintContext;
449
450    #[test]
451    fn test_basic_functionality() {
452        let rule = MD030ListMarkerSpace::default();
453        let content = "* Item 1\n* Item 2\n  * Nested item\n1. Ordered item";
454        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
455        let result = rule.check(&ctx).unwrap();
456        assert!(
457            result.is_empty(),
458            "Correctly spaced list markers should not generate warnings"
459        );
460        let content = "*  Item 1 (too many spaces)\n* Item 2\n1.   Ordered item (too many spaces)";
461        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
462        let result = rule.check(&ctx).unwrap();
463        // Expect warnings for lines with too many spaces after the marker
464        assert_eq!(
465            result.len(),
466            2,
467            "Should flag lines with too many spaces after list marker"
468        );
469        for warning in result {
470            assert!(
471                warning.message.starts_with("Spaces after list markers (Expected:")
472                    && warning.message.contains("Actual:"),
473                "Warning message should include expected and actual values, got: '{}'",
474                warning.message
475            );
476        }
477    }
478}