rumdl_lib/rules/
md009_trailing_spaces.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::rule_config_serde::RuleConfig;
3use crate::utils::range_utils::{LineIndex, calculate_trailing_range};
4use crate::utils::regex_cache::{ORDERED_LIST_MARKER_REGEX, UNORDERED_LIST_MARKER_REGEX, get_cached_regex};
5
6mod md009_config;
7use md009_config::MD009Config;
8
9// No need for lazy_static, we'll use get_cached_regex directly
10
11#[derive(Debug, Clone, Default)]
12pub struct MD009TrailingSpaces {
13    config: MD009Config,
14}
15
16impl MD009TrailingSpaces {
17    pub fn new(br_spaces: usize, strict: bool) -> Self {
18        Self {
19            config: MD009Config {
20                br_spaces,
21                strict,
22                list_item_empty_lines: false,
23            },
24        }
25    }
26
27    pub fn from_config_struct(config: MD009Config) -> Self {
28        Self { config }
29    }
30
31    fn is_empty_blockquote_line(line: &str) -> bool {
32        let trimmed = line.trim_start();
33        trimmed.starts_with('>') && trimmed.trim_end() == ">"
34    }
35
36    fn count_trailing_spaces(line: &str) -> usize {
37        line.chars().rev().take_while(|&c| c == ' ').count()
38    }
39
40    fn is_empty_list_item_line(line: &str, prev_line: Option<&str>) -> bool {
41        // A line is an empty list item line if:
42        // 1. It's blank or only contains spaces
43        // 2. The previous line is a list item
44        if !line.trim().is_empty() {
45            return false;
46        }
47
48        if let Some(prev) = prev_line {
49            // Check for unordered list markers (*, -, +) with proper formatting
50            UNORDERED_LIST_MARKER_REGEX.is_match(prev) || ORDERED_LIST_MARKER_REGEX.is_match(prev)
51        } else {
52            false
53        }
54    }
55}
56
57impl Rule for MD009TrailingSpaces {
58    fn name(&self) -> &'static str {
59        "MD009"
60    }
61
62    fn description(&self) -> &'static str {
63        "Trailing spaces should be removed"
64    }
65
66    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
67        let content = ctx.content;
68        let _line_index = LineIndex::new(content.to_string());
69
70        let mut warnings = Vec::new();
71
72        let lines: Vec<&str> = content.lines().collect();
73
74        for (line_num, &line) in lines.iter().enumerate() {
75            let trailing_spaces = Self::count_trailing_spaces(line);
76
77            // Skip if no trailing spaces
78            if trailing_spaces == 0 {
79                continue;
80            }
81
82            // Handle empty lines
83            if line.trim().is_empty() {
84                if trailing_spaces > 0 {
85                    // Check if this is an empty list item line and config allows it
86                    let prev_line = if line_num > 0 { Some(lines[line_num - 1]) } else { None };
87                    if self.config.list_item_empty_lines && Self::is_empty_list_item_line(line, prev_line) {
88                        continue;
89                    }
90
91                    // Calculate precise character range for all trailing spaces on empty line
92                    let (start_line, start_col, end_line, end_col) = calculate_trailing_range(line_num + 1, line, 0);
93
94                    warnings.push(LintWarning {
95                        rule_name: Some(self.name()),
96                        line: start_line,
97                        column: start_col,
98                        end_line,
99                        end_column: end_col,
100                        message: "Empty line has trailing spaces".to_string(),
101                        severity: Severity::Warning,
102                        fix: Some(Fix {
103                            range: _line_index.line_col_to_byte_range_with_length(line_num + 1, 1, line.len()),
104                            replacement: String::new(),
105                        }),
106                    });
107                }
108                continue;
109            }
110
111            // Handle code blocks if not in strict mode
112            if !self.config.strict {
113                // Use pre-computed line info
114                if let Some(line_info) = ctx.line_info(line_num + 1)
115                    && line_info.in_code_block
116                {
117                    continue;
118                }
119            }
120
121            // Check if it's a valid line break
122            // Special handling: if the content ends with a newline, the last line from .lines()
123            // is not really the "last line" in terms of trailing spaces rules
124            let is_truly_last_line = line_num == lines.len() - 1 && !content.ends_with('\n');
125            if !self.config.strict && !is_truly_last_line && trailing_spaces == self.config.br_spaces {
126                continue;
127            }
128
129            // Special handling for empty blockquote lines
130            if Self::is_empty_blockquote_line(line) {
131                let trimmed = line.trim_end();
132                // Calculate precise character range for trailing spaces after blockquote marker
133                let (start_line, start_col, end_line, end_col) =
134                    calculate_trailing_range(line_num + 1, line, trimmed.len());
135
136                warnings.push(LintWarning {
137                    rule_name: Some(self.name()),
138                    line: start_line,
139                    column: start_col,
140                    end_line,
141                    end_column: end_col,
142                    message: "Empty blockquote line needs a space after >".to_string(),
143                    severity: Severity::Warning,
144                    fix: Some(Fix {
145                        range: _line_index.line_col_to_byte_range_with_length(
146                            line_num + 1,
147                            trimmed.len() + 1,
148                            line.len() - trimmed.len(),
149                        ),
150                        replacement: " ".to_string(),
151                    }),
152                });
153                continue;
154            }
155
156            let trimmed = line.trim_end();
157            // Calculate precise character range for all trailing spaces
158            let (start_line, start_col, end_line, end_col) =
159                calculate_trailing_range(line_num + 1, line, trimmed.len());
160
161            warnings.push(LintWarning {
162                rule_name: Some(self.name()),
163                line: start_line,
164                column: start_col,
165                end_line,
166                end_column: end_col,
167                message: if trailing_spaces == 1 {
168                    "Trailing space found".to_string()
169                } else {
170                    format!("{trailing_spaces} trailing spaces found")
171                },
172                severity: Severity::Warning,
173                fix: Some(Fix {
174                    range: _line_index.line_col_to_byte_range_with_length(
175                        line_num + 1,
176                        trimmed.len() + 1,
177                        trailing_spaces,
178                    ),
179                    replacement: if !self.config.strict && !is_truly_last_line && trailing_spaces > 0 {
180                        " ".repeat(self.config.br_spaces)
181                    } else {
182                        String::new()
183                    },
184                }),
185            });
186        }
187
188        Ok(warnings)
189    }
190
191    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
192        let content = ctx.content;
193
194        // For simple cases (strict mode), use fast regex approach
195        if self.config.strict {
196            // In strict mode, remove ALL trailing spaces everywhere
197            return Ok(get_cached_regex(r"(?m) +$")
198                .unwrap()
199                .replace_all(content, "")
200                .to_string());
201        }
202
203        // For complex cases, we need line-by-line processing but with optimizations
204        let lines: Vec<&str> = content.lines().collect();
205        let mut result = String::with_capacity(content.len()); // Pre-allocate capacity
206
207        for (i, line) in lines.iter().enumerate() {
208            // Fast path: if no trailing spaces, just add the line
209            if !line.ends_with(' ') {
210                result.push_str(line);
211                result.push('\n');
212                continue;
213            }
214
215            let trimmed = line.trim_end();
216            let trailing_spaces = Self::count_trailing_spaces(line);
217
218            // Handle empty lines - fast regex replacement
219            if trimmed.is_empty() {
220                // Check if this is an empty list item line and config allows it
221                let prev_line = if i > 0 { Some(lines[i - 1]) } else { None };
222                if self.config.list_item_empty_lines && Self::is_empty_list_item_line(line, prev_line) {
223                    result.push_str(line);
224                } else {
225                    // Remove all trailing spaces
226                }
227                result.push('\n');
228                continue;
229            }
230
231            // Handle code blocks if not in strict mode
232            if let Some(line_info) = ctx.line_info(i + 1)
233                && line_info.in_code_block
234            {
235                result.push_str(line);
236                result.push('\n');
237                continue;
238            }
239
240            // Special handling for empty blockquote lines
241            if Self::is_empty_blockquote_line(line) {
242                result.push_str(trimmed);
243                result.push(' '); // Add a space after the blockquote marker
244                result.push('\n');
245                continue;
246            }
247
248            // Handle lines with trailing spaces
249            let is_truly_last_line = i == lines.len() - 1 && !content.ends_with('\n');
250
251            result.push_str(trimmed);
252
253            // Check if this line is a heading - headings should never have trailing spaces
254            let is_heading = if let Some(line_info) = ctx.line_info(i + 1) {
255                line_info.heading.is_some()
256            } else {
257                // Fallback: check if line starts with #
258                trimmed.starts_with('#')
259            };
260
261            // In non-strict mode, preserve line breaks by normalizing to br_spaces
262            // BUT: Never preserve trailing spaces in headings as they serve no purpose
263            if !self.config.strict && !is_truly_last_line && trailing_spaces > 0 && !is_heading {
264                // Optimize for common case of 2 spaces
265                match self.config.br_spaces {
266                    0 => {}
267                    1 => result.push(' '),
268                    2 => result.push_str("  "),
269                    n => result.push_str(&" ".repeat(n)),
270                }
271            }
272            result.push('\n');
273        }
274
275        // Preserve original ending (with or without final newline)
276        if !content.ends_with('\n') && result.ends_with('\n') {
277            result.pop();
278        }
279
280        Ok(result)
281    }
282
283    fn as_any(&self) -> &dyn std::any::Any {
284        self
285    }
286
287    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
288        // Skip if content is empty or has no spaces at all
289        ctx.content.is_empty() || !ctx.content.contains(' ')
290    }
291
292    fn category(&self) -> RuleCategory {
293        RuleCategory::Whitespace
294    }
295
296    fn default_config_section(&self) -> Option<(String, toml::Value)> {
297        let default_config = MD009Config::default();
298        let json_value = serde_json::to_value(&default_config).ok()?;
299        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
300
301        if let toml::Value::Table(table) = toml_value {
302            if !table.is_empty() {
303                Some((MD009Config::RULE_NAME.to_string(), toml::Value::Table(table)))
304            } else {
305                None
306            }
307        } else {
308            None
309        }
310    }
311
312    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
313    where
314        Self: Sized,
315    {
316        let rule_config = crate::rule_config_serde::load_rule_config::<MD009Config>(config);
317        Box::new(Self::from_config_struct(rule_config))
318    }
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324    use crate::lint_context::LintContext;
325    use crate::rule::Rule;
326
327    #[test]
328    fn test_no_trailing_spaces() {
329        let rule = MD009TrailingSpaces::default();
330        let content = "This is a line\nAnother line\nNo trailing spaces";
331        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
332        let result = rule.check(&ctx).unwrap();
333        assert!(result.is_empty());
334    }
335
336    #[test]
337    fn test_basic_trailing_spaces() {
338        let rule = MD009TrailingSpaces::default();
339        let content = "Line with spaces   \nAnother line  \nClean line";
340        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
341        let result = rule.check(&ctx).unwrap();
342        // Default br_spaces=2, so line with 2 spaces is OK
343        assert_eq!(result.len(), 1);
344        assert_eq!(result[0].line, 1);
345        assert_eq!(result[0].message, "3 trailing spaces found");
346    }
347
348    #[test]
349    fn test_fix_basic_trailing_spaces() {
350        let rule = MD009TrailingSpaces::default();
351        let content = "Line with spaces   \nAnother line  \nClean line";
352        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
353        let fixed = rule.fix(&ctx).unwrap();
354        assert_eq!(fixed, "Line with spaces  \nAnother line  \nClean line");
355    }
356
357    #[test]
358    fn test_strict_mode() {
359        let rule = MD009TrailingSpaces::new(2, true);
360        let content = "Line with spaces  \nCode block:  \n```  \nCode with spaces  \n```  ";
361        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
362        let result = rule.check(&ctx).unwrap();
363        // In strict mode, all trailing spaces are flagged
364        assert_eq!(result.len(), 5);
365
366        let fixed = rule.fix(&ctx).unwrap();
367        assert_eq!(fixed, "Line with spaces\nCode block:\n```\nCode with spaces\n```");
368    }
369
370    #[test]
371    fn test_non_strict_mode_with_code_blocks() {
372        let rule = MD009TrailingSpaces::new(2, false);
373        let content = "Line with spaces  \n```\nCode with spaces  \n```\nOutside code  ";
374        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
375        let result = rule.check(&ctx).unwrap();
376        // In non-strict mode, code blocks are not checked
377        // Line 1 has 2 spaces (= br_spaces), so it's OK
378        // Line 5 is last line without newline, so trailing spaces are flagged
379        assert_eq!(result.len(), 1);
380        assert_eq!(result[0].line, 5);
381    }
382
383    #[test]
384    fn test_br_spaces_preservation() {
385        let rule = MD009TrailingSpaces::new(2, false);
386        let content = "Line with two spaces  \nLine with three spaces   \nLine with one space ";
387        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
388        let result = rule.check(&ctx).unwrap();
389        // br_spaces=2, so lines with exactly 2 spaces are OK
390        // Line 2 has 3 spaces (will be normalized to 2)
391        // Line 3 has 1 space and is last line without newline (will be removed)
392        assert_eq!(result.len(), 2);
393        assert_eq!(result[0].line, 2);
394        assert_eq!(result[1].line, 3);
395
396        let fixed = rule.fix(&ctx).unwrap();
397        // Line 1: keeps 2 spaces
398        // Line 2: normalized from 3 to 2 spaces
399        // Line 3: last line without newline, spaces removed
400        assert_eq!(
401            fixed,
402            "Line with two spaces  \nLine with three spaces  \nLine with one space"
403        );
404    }
405
406    #[test]
407    fn test_empty_lines_with_spaces() {
408        let rule = MD009TrailingSpaces::default();
409        let content = "Normal line\n   \n  \nAnother line";
410        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
411        let result = rule.check(&ctx).unwrap();
412        assert_eq!(result.len(), 2);
413        assert_eq!(result[0].message, "Empty line has trailing spaces");
414        assert_eq!(result[1].message, "Empty line has trailing spaces");
415
416        let fixed = rule.fix(&ctx).unwrap();
417        assert_eq!(fixed, "Normal line\n\n\nAnother line");
418    }
419
420    #[test]
421    fn test_empty_blockquote_lines() {
422        let rule = MD009TrailingSpaces::default();
423        let content = "> Quote\n>   \n> More quote";
424        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
425        let result = rule.check(&ctx).unwrap();
426        assert_eq!(result.len(), 1);
427        assert_eq!(result[0].line, 2);
428        assert_eq!(result[0].message, "Empty blockquote line needs a space after >");
429
430        let fixed = rule.fix(&ctx).unwrap();
431        assert_eq!(fixed, "> Quote\n> \n> More quote");
432    }
433
434    #[test]
435    fn test_last_line_handling() {
436        let rule = MD009TrailingSpaces::new(2, false);
437
438        // Content without final newline
439        let content = "First line  \nLast line  ";
440        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
441        let result = rule.check(&ctx).unwrap();
442        // Last line without newline should have trailing spaces removed
443        assert_eq!(result.len(), 1);
444        assert_eq!(result[0].line, 2);
445
446        let fixed = rule.fix(&ctx).unwrap();
447        assert_eq!(fixed, "First line  \nLast line");
448
449        // Content with final newline
450        let content_with_newline = "First line  \nLast line  \n";
451        let ctx = LintContext::new(content_with_newline, crate::config::MarkdownFlavor::Standard);
452        let result = rule.check(&ctx).unwrap();
453        // Both lines should preserve br_spaces
454        assert!(result.is_empty());
455    }
456
457    #[test]
458    fn test_single_trailing_space() {
459        let rule = MD009TrailingSpaces::new(2, false);
460        let content = "Line with one space ";
461        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
462        let result = rule.check(&ctx).unwrap();
463        assert_eq!(result.len(), 1);
464        assert_eq!(result[0].message, "Trailing space found");
465    }
466
467    #[test]
468    fn test_tabs_not_spaces() {
469        let rule = MD009TrailingSpaces::default();
470        let content = "Line with tab\t\nLine with spaces  ";
471        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
472        let result = rule.check(&ctx).unwrap();
473        // Only spaces are checked, not tabs
474        assert_eq!(result.len(), 1);
475        assert_eq!(result[0].line, 2);
476    }
477
478    #[test]
479    fn test_mixed_content() {
480        let rule = MD009TrailingSpaces::new(2, false);
481        // Construct content with actual trailing spaces using string concatenation
482        let mut content = String::new();
483        content.push_str("# Heading");
484        content.push_str("   "); // Add 3 trailing spaces (more than br_spaces=2)
485        content.push('\n');
486        content.push_str("Normal paragraph\n> Blockquote\n>\n```\nCode block\n```\n- List item\n");
487
488        let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
489        let result = rule.check(&ctx).unwrap();
490        // Should flag the line with trailing spaces
491        assert_eq!(result.len(), 1);
492        assert_eq!(result[0].line, 1);
493        assert!(result[0].message.contains("trailing spaces"));
494    }
495
496    #[test]
497    fn test_column_positions() {
498        let rule = MD009TrailingSpaces::default();
499        let content = "Text   ";
500        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
501        let result = rule.check(&ctx).unwrap();
502        assert_eq!(result.len(), 1);
503        assert_eq!(result[0].column, 5); // After "Text"
504        assert_eq!(result[0].end_column, 8); // After all spaces
505    }
506
507    #[test]
508    fn test_default_config() {
509        let rule = MD009TrailingSpaces::default();
510        let config = rule.default_config_section();
511        assert!(config.is_some());
512        let (name, _value) = config.unwrap();
513        assert_eq!(name, "MD009");
514    }
515
516    #[test]
517    fn test_from_config() {
518        let mut config = crate::config::Config::default();
519        let mut rule_config = crate::config::RuleConfig::default();
520        rule_config
521            .values
522            .insert("br_spaces".to_string(), toml::Value::Integer(3));
523        rule_config
524            .values
525            .insert("strict".to_string(), toml::Value::Boolean(true));
526        config.rules.insert("MD009".to_string(), rule_config);
527
528        let rule = MD009TrailingSpaces::from_config(&config);
529        let content = "Line   ";
530        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
531        let result = rule.check(&ctx).unwrap();
532        assert_eq!(result.len(), 1);
533
534        // In strict mode, should remove all spaces
535        let fixed = rule.fix(&ctx).unwrap();
536        assert_eq!(fixed, "Line");
537    }
538
539    #[test]
540    fn test_list_item_empty_lines() {
541        // Create rule with list_item_empty_lines enabled
542        let config = MD009Config {
543            list_item_empty_lines: true,
544            ..Default::default()
545        };
546        let rule = MD009TrailingSpaces::from_config_struct(config);
547
548        // Test unordered list with empty line
549        let content = "- First item\n  \n- Second item";
550        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
551        let result = rule.check(&ctx).unwrap();
552        // Should not flag the empty line with spaces after list item
553        assert!(result.is_empty());
554
555        // Test ordered list with empty line
556        let content = "1. First item\n  \n2. Second item";
557        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
558        let result = rule.check(&ctx).unwrap();
559        assert!(result.is_empty());
560
561        // Test that non-list empty lines are still flagged
562        let content = "Normal paragraph\n  \nAnother paragraph";
563        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
564        let result = rule.check(&ctx).unwrap();
565        assert_eq!(result.len(), 1);
566        assert_eq!(result[0].line, 2);
567    }
568
569    #[test]
570    fn test_list_item_empty_lines_disabled() {
571        // Default config has list_item_empty_lines disabled
572        let rule = MD009TrailingSpaces::default();
573
574        let content = "- First item\n  \n- Second item";
575        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
576        let result = rule.check(&ctx).unwrap();
577        // Should flag the empty line with spaces
578        assert_eq!(result.len(), 1);
579        assert_eq!(result[0].line, 2);
580    }
581
582    #[test]
583    fn test_performance_large_document() {
584        let rule = MD009TrailingSpaces::default();
585        let mut content = String::new();
586        for i in 0..1000 {
587            content.push_str(&format!("Line {i} with spaces  \n"));
588        }
589        let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
590        let result = rule.check(&ctx).unwrap();
591        // Default br_spaces=2, so all lines with 2 spaces are OK
592        assert_eq!(result.len(), 0);
593    }
594
595    #[test]
596    fn test_preserve_content_after_fix() {
597        let rule = MD009TrailingSpaces::new(2, false);
598        let content = "**Bold** text  \n*Italic* text  \n[Link](url)  ";
599        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
600        let fixed = rule.fix(&ctx).unwrap();
601        assert_eq!(fixed, "**Bold** text  \n*Italic* text  \n[Link](url)");
602    }
603
604    #[test]
605    fn test_nested_blockquotes() {
606        let rule = MD009TrailingSpaces::default();
607        let content = "> > Nested  \n> >   \n> Normal  ";
608        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
609        let result = rule.check(&ctx).unwrap();
610        // Line 2 has empty blockquote, line 3 is last line without newline
611        assert_eq!(result.len(), 2);
612        assert_eq!(result[0].line, 2);
613        assert_eq!(result[1].line, 3);
614
615        let fixed = rule.fix(&ctx).unwrap();
616        // The fix adds a single space after empty blockquote markers
617        assert_eq!(fixed, "> > Nested  \n> >  \n> Normal");
618    }
619
620    #[test]
621    fn test_windows_line_endings() {
622        let rule = MD009TrailingSpaces::default();
623        // Note: This test simulates Windows line endings behavior
624        let content = "Line with spaces  \r\nAnother line  ";
625        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
626        let result = rule.check(&ctx).unwrap();
627        // Line 1 has 2 spaces (= br_spaces) so it's OK
628        // Line 2 is last line without newline, so it's flagged
629        assert_eq!(result.len(), 1);
630        assert_eq!(result[0].line, 2);
631    }
632}