rumdl_lib/rules/
md009_trailing_spaces.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::rule_config_serde::RuleConfig;
3use crate::utils::range_utils::{LineIndex, calculate_trailing_range};
4use crate::utils::regex_cache::{ORDERED_LIST_MARKER_REGEX, UNORDERED_LIST_MARKER_REGEX, get_cached_regex};
5
6mod md009_config;
7use md009_config::MD009Config;
8
9// No need for lazy_static, we'll use get_cached_regex directly
10
11#[derive(Debug, Clone, Default)]
12pub struct MD009TrailingSpaces {
13    config: MD009Config,
14}
15
16impl MD009TrailingSpaces {
17    pub fn new(br_spaces: usize, strict: bool) -> Self {
18        Self {
19            config: MD009Config {
20                br_spaces,
21                strict,
22                list_item_empty_lines: false,
23            },
24        }
25    }
26
27    pub fn from_config_struct(config: MD009Config) -> Self {
28        Self { config }
29    }
30
31    fn count_trailing_spaces(line: &str) -> usize {
32        line.chars().rev().take_while(|&c| c == ' ').count()
33    }
34
35    fn is_empty_list_item_line(line: &str, prev_line: Option<&str>) -> bool {
36        // A line is an empty list item line if:
37        // 1. It's blank or only contains spaces
38        // 2. The previous line is a list item
39        if !line.trim().is_empty() {
40            return false;
41        }
42
43        if let Some(prev) = prev_line {
44            // Check for unordered list markers (*, -, +) with proper formatting
45            UNORDERED_LIST_MARKER_REGEX.is_match(prev) || ORDERED_LIST_MARKER_REGEX.is_match(prev)
46        } else {
47            false
48        }
49    }
50}
51
52impl Rule for MD009TrailingSpaces {
53    fn name(&self) -> &'static str {
54        "MD009"
55    }
56
57    fn description(&self) -> &'static str {
58        "Trailing spaces should be removed"
59    }
60
61    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
62        let content = ctx.content;
63        let _line_index = LineIndex::new(content.to_string());
64
65        let mut warnings = Vec::new();
66
67        let lines: Vec<&str> = content.lines().collect();
68
69        for (line_num, &line) in lines.iter().enumerate() {
70            let trailing_spaces = Self::count_trailing_spaces(line);
71
72            // Skip if no trailing spaces
73            if trailing_spaces == 0 {
74                continue;
75            }
76
77            // Handle empty lines
78            if line.trim().is_empty() {
79                if trailing_spaces > 0 {
80                    // Check if this is an empty list item line and config allows it
81                    let prev_line = if line_num > 0 { Some(lines[line_num - 1]) } else { None };
82                    if self.config.list_item_empty_lines && Self::is_empty_list_item_line(line, prev_line) {
83                        continue;
84                    }
85
86                    // Check if this is an empty blockquote line (like "> " or ">> ")
87                    // These are allowed to have a single trailing space by MD028
88                    let trimmed_line = line.trim_end();
89                    if trimmed_line.chars().all(|c| c == '>' || c == ' ' || c == '\t')
90                        && trimmed_line.contains('>')
91                        && trailing_spaces == 1
92                    {
93                        // This is an empty blockquote line with single trailing space - allowed
94                        continue;
95                    }
96
97                    // Calculate precise character range for all trailing spaces on empty line
98                    let (start_line, start_col, end_line, end_col) = calculate_trailing_range(line_num + 1, line, 0);
99
100                    warnings.push(LintWarning {
101                        rule_name: Some(self.name()),
102                        line: start_line,
103                        column: start_col,
104                        end_line,
105                        end_column: end_col,
106                        message: "Empty line has trailing spaces".to_string(),
107                        severity: Severity::Warning,
108                        fix: Some(Fix {
109                            range: _line_index.line_col_to_byte_range_with_length(line_num + 1, 1, line.len()),
110                            replacement: String::new(),
111                        }),
112                    });
113                }
114                continue;
115            }
116
117            // Handle code blocks if not in strict mode
118            if !self.config.strict {
119                // Use pre-computed line info
120                if let Some(line_info) = ctx.line_info(line_num + 1)
121                    && line_info.in_code_block
122                {
123                    continue;
124                }
125            }
126
127            // Check if it's a valid line break
128            // Special handling: if the content ends with a newline, the last line from .lines()
129            // is not really the "last line" in terms of trailing spaces rules
130            let is_truly_last_line = line_num == lines.len() - 1 && !content.ends_with('\n');
131            if !self.config.strict && !is_truly_last_line && trailing_spaces == self.config.br_spaces {
132                continue;
133            }
134
135            // Check if this is an empty blockquote line ("> " or ">> " etc)
136            // These are allowed by MD028 to have a single trailing space
137            let trimmed = line.trim_end();
138            let is_empty_blockquote_with_space = trimmed.chars().all(|c| c == '>' || c == ' ' || c == '\t')
139                && trimmed.contains('>')
140                && trailing_spaces == 1;
141
142            if is_empty_blockquote_with_space {
143                continue; // Allow single trailing space for empty blockquote lines
144            }
145            // Calculate precise character range for all trailing spaces
146            let (start_line, start_col, end_line, end_col) =
147                calculate_trailing_range(line_num + 1, line, trimmed.len());
148
149            warnings.push(LintWarning {
150                rule_name: Some(self.name()),
151                line: start_line,
152                column: start_col,
153                end_line,
154                end_column: end_col,
155                message: if trailing_spaces == 1 {
156                    "Trailing space found".to_string()
157                } else {
158                    format!("{trailing_spaces} trailing spaces found")
159                },
160                severity: Severity::Warning,
161                fix: Some(Fix {
162                    range: _line_index.line_col_to_byte_range_with_length(
163                        line_num + 1,
164                        trimmed.len() + 1,
165                        trailing_spaces,
166                    ),
167                    replacement: if !self.config.strict && !is_truly_last_line && trailing_spaces > 0 {
168                        " ".repeat(self.config.br_spaces)
169                    } else {
170                        String::new()
171                    },
172                }),
173            });
174        }
175
176        Ok(warnings)
177    }
178
179    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
180        let content = ctx.content;
181
182        // For simple cases (strict mode), use fast regex approach
183        if self.config.strict {
184            // In strict mode, remove ALL trailing spaces everywhere
185            return Ok(get_cached_regex(r"(?m) +$")
186                .unwrap()
187                .replace_all(content, "")
188                .to_string());
189        }
190
191        // For complex cases, we need line-by-line processing but with optimizations
192        let lines: Vec<&str> = content.lines().collect();
193        let mut result = String::with_capacity(content.len()); // Pre-allocate capacity
194
195        for (i, line) in lines.iter().enumerate() {
196            // Fast path: if no trailing spaces, just add the line
197            if !line.ends_with(' ') {
198                result.push_str(line);
199                result.push('\n');
200                continue;
201            }
202
203            let trimmed = line.trim_end();
204            let trailing_spaces = Self::count_trailing_spaces(line);
205
206            // Handle empty lines - fast regex replacement
207            if trimmed.is_empty() {
208                // Check if this is an empty list item line and config allows it
209                let prev_line = if i > 0 { Some(lines[i - 1]) } else { None };
210                if self.config.list_item_empty_lines && Self::is_empty_list_item_line(line, prev_line) {
211                    result.push_str(line);
212                } else {
213                    // Remove all trailing spaces - line is empty so don't add anything
214                }
215                result.push('\n');
216                continue;
217            }
218
219            // Handle code blocks if not in strict mode
220            if let Some(line_info) = ctx.line_info(i + 1)
221                && line_info.in_code_block
222            {
223                result.push_str(line);
224                result.push('\n');
225                continue;
226            }
227
228            // No special handling for empty blockquote lines - treat them like regular lines
229
230            // Handle lines with trailing spaces
231            let is_truly_last_line = i == lines.len() - 1 && !content.ends_with('\n');
232
233            result.push_str(trimmed);
234
235            // Check if this line is a heading - headings should never have trailing spaces
236            let is_heading = if let Some(line_info) = ctx.line_info(i + 1) {
237                line_info.heading.is_some()
238            } else {
239                // Fallback: check if line starts with #
240                trimmed.starts_with('#')
241            };
242
243            // Check if this is an empty blockquote line (just ">")
244            let is_empty_blockquote = if let Some(line_info) = ctx.line_info(i + 1) {
245                line_info.blockquote.as_ref().is_some_and(|bq| bq.content.is_empty())
246            } else {
247                false
248            };
249
250            // In non-strict mode, preserve line breaks by normalizing to br_spaces
251            // BUT: Never preserve trailing spaces in headings or empty blockquotes as they serve no purpose
252            if !self.config.strict && !is_truly_last_line && trailing_spaces > 0 && !is_heading && !is_empty_blockquote
253            {
254                // Optimize for common case of 2 spaces
255                match self.config.br_spaces {
256                    0 => {}
257                    1 => result.push(' '),
258                    2 => result.push_str("  "),
259                    n => result.push_str(&" ".repeat(n)),
260                }
261            }
262            result.push('\n');
263        }
264
265        // Preserve original ending (with or without final newline)
266        if !content.ends_with('\n') && result.ends_with('\n') {
267            result.pop();
268        }
269
270        Ok(result)
271    }
272
273    fn as_any(&self) -> &dyn std::any::Any {
274        self
275    }
276
277    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
278        // Skip if content is empty or has no spaces at all
279        ctx.content.is_empty() || !ctx.content.contains(' ')
280    }
281
282    fn category(&self) -> RuleCategory {
283        RuleCategory::Whitespace
284    }
285
286    fn default_config_section(&self) -> Option<(String, toml::Value)> {
287        let default_config = MD009Config::default();
288        let json_value = serde_json::to_value(&default_config).ok()?;
289        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
290
291        if let toml::Value::Table(table) = toml_value {
292            if !table.is_empty() {
293                Some((MD009Config::RULE_NAME.to_string(), toml::Value::Table(table)))
294            } else {
295                None
296            }
297        } else {
298            None
299        }
300    }
301
302    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
303    where
304        Self: Sized,
305    {
306        let rule_config = crate::rule_config_serde::load_rule_config::<MD009Config>(config);
307        Box::new(Self::from_config_struct(rule_config))
308    }
309}
310
311#[cfg(test)]
312mod tests {
313    use super::*;
314    use crate::lint_context::LintContext;
315    use crate::rule::Rule;
316
317    #[test]
318    fn test_no_trailing_spaces() {
319        let rule = MD009TrailingSpaces::default();
320        let content = "This is a line\nAnother line\nNo trailing spaces";
321        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
322        let result = rule.check(&ctx).unwrap();
323        assert!(result.is_empty());
324    }
325
326    #[test]
327    fn test_basic_trailing_spaces() {
328        let rule = MD009TrailingSpaces::default();
329        let content = "Line with spaces   \nAnother line  \nClean line";
330        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
331        let result = rule.check(&ctx).unwrap();
332        // Default br_spaces=2, so line with 2 spaces is OK
333        assert_eq!(result.len(), 1);
334        assert_eq!(result[0].line, 1);
335        assert_eq!(result[0].message, "3 trailing spaces found");
336    }
337
338    #[test]
339    fn test_fix_basic_trailing_spaces() {
340        let rule = MD009TrailingSpaces::default();
341        let content = "Line with spaces   \nAnother line  \nClean line";
342        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
343        let fixed = rule.fix(&ctx).unwrap();
344        assert_eq!(fixed, "Line with spaces  \nAnother line  \nClean line");
345    }
346
347    #[test]
348    fn test_strict_mode() {
349        let rule = MD009TrailingSpaces::new(2, true);
350        let content = "Line with spaces  \nCode block:  \n```  \nCode with spaces  \n```  ";
351        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
352        let result = rule.check(&ctx).unwrap();
353        // In strict mode, all trailing spaces are flagged
354        assert_eq!(result.len(), 5);
355
356        let fixed = rule.fix(&ctx).unwrap();
357        assert_eq!(fixed, "Line with spaces\nCode block:\n```\nCode with spaces\n```");
358    }
359
360    #[test]
361    fn test_non_strict_mode_with_code_blocks() {
362        let rule = MD009TrailingSpaces::new(2, false);
363        let content = "Line with spaces  \n```\nCode with spaces  \n```\nOutside code  ";
364        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
365        let result = rule.check(&ctx).unwrap();
366        // In non-strict mode, code blocks are not checked
367        // Line 1 has 2 spaces (= br_spaces), so it's OK
368        // Line 5 is last line without newline, so trailing spaces are flagged
369        assert_eq!(result.len(), 1);
370        assert_eq!(result[0].line, 5);
371    }
372
373    #[test]
374    fn test_br_spaces_preservation() {
375        let rule = MD009TrailingSpaces::new(2, false);
376        let content = "Line with two spaces  \nLine with three spaces   \nLine with one space ";
377        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
378        let result = rule.check(&ctx).unwrap();
379        // br_spaces=2, so lines with exactly 2 spaces are OK
380        // Line 2 has 3 spaces (will be normalized to 2)
381        // Line 3 has 1 space and is last line without newline (will be removed)
382        assert_eq!(result.len(), 2);
383        assert_eq!(result[0].line, 2);
384        assert_eq!(result[1].line, 3);
385
386        let fixed = rule.fix(&ctx).unwrap();
387        // Line 1: keeps 2 spaces
388        // Line 2: normalized from 3 to 2 spaces
389        // Line 3: last line without newline, spaces removed
390        assert_eq!(
391            fixed,
392            "Line with two spaces  \nLine with three spaces  \nLine with one space"
393        );
394    }
395
396    #[test]
397    fn test_empty_lines_with_spaces() {
398        let rule = MD009TrailingSpaces::default();
399        let content = "Normal line\n   \n  \nAnother line";
400        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
401        let result = rule.check(&ctx).unwrap();
402        assert_eq!(result.len(), 2);
403        assert_eq!(result[0].message, "Empty line has trailing spaces");
404        assert_eq!(result[1].message, "Empty line has trailing spaces");
405
406        let fixed = rule.fix(&ctx).unwrap();
407        assert_eq!(fixed, "Normal line\n\n\nAnother line");
408    }
409
410    #[test]
411    fn test_empty_blockquote_lines() {
412        let rule = MD009TrailingSpaces::default();
413        let content = "> Quote\n>   \n> More quote";
414        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
415        let result = rule.check(&ctx).unwrap();
416        assert_eq!(result.len(), 1);
417        assert_eq!(result[0].line, 2);
418        assert_eq!(result[0].message, "3 trailing spaces found");
419
420        let fixed = rule.fix(&ctx).unwrap();
421        assert_eq!(fixed, "> Quote\n>\n> More quote"); // All trailing spaces removed
422    }
423
424    #[test]
425    fn test_last_line_handling() {
426        let rule = MD009TrailingSpaces::new(2, false);
427
428        // Content without final newline
429        let content = "First line  \nLast line  ";
430        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
431        let result = rule.check(&ctx).unwrap();
432        // Last line without newline should have trailing spaces removed
433        assert_eq!(result.len(), 1);
434        assert_eq!(result[0].line, 2);
435
436        let fixed = rule.fix(&ctx).unwrap();
437        assert_eq!(fixed, "First line  \nLast line");
438
439        // Content with final newline
440        let content_with_newline = "First line  \nLast line  \n";
441        let ctx = LintContext::new(content_with_newline, crate::config::MarkdownFlavor::Standard);
442        let result = rule.check(&ctx).unwrap();
443        // Both lines should preserve br_spaces
444        assert!(result.is_empty());
445    }
446
447    #[test]
448    fn test_single_trailing_space() {
449        let rule = MD009TrailingSpaces::new(2, false);
450        let content = "Line with one space ";
451        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
452        let result = rule.check(&ctx).unwrap();
453        assert_eq!(result.len(), 1);
454        assert_eq!(result[0].message, "Trailing space found");
455    }
456
457    #[test]
458    fn test_tabs_not_spaces() {
459        let rule = MD009TrailingSpaces::default();
460        let content = "Line with tab\t\nLine with spaces  ";
461        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
462        let result = rule.check(&ctx).unwrap();
463        // Only spaces are checked, not tabs
464        assert_eq!(result.len(), 1);
465        assert_eq!(result[0].line, 2);
466    }
467
468    #[test]
469    fn test_mixed_content() {
470        let rule = MD009TrailingSpaces::new(2, false);
471        // Construct content with actual trailing spaces using string concatenation
472        let mut content = String::new();
473        content.push_str("# Heading");
474        content.push_str("   "); // Add 3 trailing spaces (more than br_spaces=2)
475        content.push('\n');
476        content.push_str("Normal paragraph\n> Blockquote\n>\n```\nCode block\n```\n- List item\n");
477
478        let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
479        let result = rule.check(&ctx).unwrap();
480        // Should flag the line with trailing spaces
481        assert_eq!(result.len(), 1);
482        assert_eq!(result[0].line, 1);
483        assert!(result[0].message.contains("trailing spaces"));
484    }
485
486    #[test]
487    fn test_column_positions() {
488        let rule = MD009TrailingSpaces::default();
489        let content = "Text   ";
490        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
491        let result = rule.check(&ctx).unwrap();
492        assert_eq!(result.len(), 1);
493        assert_eq!(result[0].column, 5); // After "Text"
494        assert_eq!(result[0].end_column, 8); // After all spaces
495    }
496
497    #[test]
498    fn test_default_config() {
499        let rule = MD009TrailingSpaces::default();
500        let config = rule.default_config_section();
501        assert!(config.is_some());
502        let (name, _value) = config.unwrap();
503        assert_eq!(name, "MD009");
504    }
505
506    #[test]
507    fn test_from_config() {
508        let mut config = crate::config::Config::default();
509        let mut rule_config = crate::config::RuleConfig::default();
510        rule_config
511            .values
512            .insert("br_spaces".to_string(), toml::Value::Integer(3));
513        rule_config
514            .values
515            .insert("strict".to_string(), toml::Value::Boolean(true));
516        config.rules.insert("MD009".to_string(), rule_config);
517
518        let rule = MD009TrailingSpaces::from_config(&config);
519        let content = "Line   ";
520        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
521        let result = rule.check(&ctx).unwrap();
522        assert_eq!(result.len(), 1);
523
524        // In strict mode, should remove all spaces
525        let fixed = rule.fix(&ctx).unwrap();
526        assert_eq!(fixed, "Line");
527    }
528
529    #[test]
530    fn test_list_item_empty_lines() {
531        // Create rule with list_item_empty_lines enabled
532        let config = MD009Config {
533            list_item_empty_lines: true,
534            ..Default::default()
535        };
536        let rule = MD009TrailingSpaces::from_config_struct(config);
537
538        // Test unordered list with empty line
539        let content = "- First item\n  \n- Second item";
540        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
541        let result = rule.check(&ctx).unwrap();
542        // Should not flag the empty line with spaces after list item
543        assert!(result.is_empty());
544
545        // Test ordered list with empty line
546        let content = "1. First item\n  \n2. Second item";
547        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
548        let result = rule.check(&ctx).unwrap();
549        assert!(result.is_empty());
550
551        // Test that non-list empty lines are still flagged
552        let content = "Normal paragraph\n  \nAnother paragraph";
553        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
554        let result = rule.check(&ctx).unwrap();
555        assert_eq!(result.len(), 1);
556        assert_eq!(result[0].line, 2);
557    }
558
559    #[test]
560    fn test_list_item_empty_lines_disabled() {
561        // Default config has list_item_empty_lines disabled
562        let rule = MD009TrailingSpaces::default();
563
564        let content = "- First item\n  \n- Second item";
565        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
566        let result = rule.check(&ctx).unwrap();
567        // Should flag the empty line with spaces
568        assert_eq!(result.len(), 1);
569        assert_eq!(result[0].line, 2);
570    }
571
572    #[test]
573    fn test_performance_large_document() {
574        let rule = MD009TrailingSpaces::default();
575        let mut content = String::new();
576        for i in 0..1000 {
577            content.push_str(&format!("Line {i} with spaces  \n"));
578        }
579        let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
580        let result = rule.check(&ctx).unwrap();
581        // Default br_spaces=2, so all lines with 2 spaces are OK
582        assert_eq!(result.len(), 0);
583    }
584
585    #[test]
586    fn test_preserve_content_after_fix() {
587        let rule = MD009TrailingSpaces::new(2, false);
588        let content = "**Bold** text  \n*Italic* text  \n[Link](url)  ";
589        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
590        let fixed = rule.fix(&ctx).unwrap();
591        assert_eq!(fixed, "**Bold** text  \n*Italic* text  \n[Link](url)");
592    }
593
594    #[test]
595    fn test_nested_blockquotes() {
596        let rule = MD009TrailingSpaces::default();
597        let content = "> > Nested  \n> >   \n> Normal  ";
598        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
599        let result = rule.check(&ctx).unwrap();
600        // Line 2 has empty blockquote, line 3 is last line without newline
601        assert_eq!(result.len(), 2);
602        assert_eq!(result[0].line, 2);
603        assert_eq!(result[1].line, 3);
604
605        let fixed = rule.fix(&ctx).unwrap();
606        // The fix adds a single space after empty blockquote markers
607        assert_eq!(fixed, "> > Nested  \n> >  \n> Normal");
608    }
609
610    #[test]
611    fn test_windows_line_endings() {
612        let rule = MD009TrailingSpaces::default();
613        // Note: This test simulates Windows line endings behavior
614        let content = "Line with spaces  \r\nAnother line  ";
615        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
616        let result = rule.check(&ctx).unwrap();
617        // Line 1 has 2 spaces (= br_spaces) so it's OK
618        // Line 2 is last line without newline, so it's flagged
619        assert_eq!(result.len(), 1);
620        assert_eq!(result[0].line, 2);
621    }
622}