rumdl_lib/rules/
md009_trailing_spaces.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::rule_config_serde::RuleConfig;
3use crate::utils::range_utils::{LineIndex, calculate_trailing_range};
4use crate::utils::regex_cache::{ORDERED_LIST_MARKER_REGEX, UNORDERED_LIST_MARKER_REGEX, get_cached_regex};
5
6mod md009_config;
7use md009_config::MD009Config;
8
9// No need for lazy_static, we'll use get_cached_regex directly
10
11#[derive(Debug, Clone, Default)]
12pub struct MD009TrailingSpaces {
13    config: MD009Config,
14}
15
16impl MD009TrailingSpaces {
17    pub fn new(br_spaces: usize, strict: bool) -> Self {
18        Self {
19            config: MD009Config {
20                br_spaces,
21                strict,
22                list_item_empty_lines: false,
23            },
24        }
25    }
26
27    pub fn from_config_struct(config: MD009Config) -> Self {
28        Self { config }
29    }
30
31    fn count_trailing_spaces(line: &str) -> usize {
32        line.chars().rev().take_while(|&c| c == ' ').count()
33    }
34
35    fn is_empty_list_item_line(line: &str, prev_line: Option<&str>) -> bool {
36        // A line is an empty list item line if:
37        // 1. It's blank or only contains spaces
38        // 2. The previous line is a list item
39        if !line.trim().is_empty() {
40            return false;
41        }
42
43        if let Some(prev) = prev_line {
44            // Check for unordered list markers (*, -, +) with proper formatting
45            UNORDERED_LIST_MARKER_REGEX.is_match(prev) || ORDERED_LIST_MARKER_REGEX.is_match(prev)
46        } else {
47            false
48        }
49    }
50}
51
52impl Rule for MD009TrailingSpaces {
53    fn name(&self) -> &'static str {
54        "MD009"
55    }
56
57    fn description(&self) -> &'static str {
58        "Trailing spaces should be removed"
59    }
60
61    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
62        let content = ctx.content;
63        let _line_index = LineIndex::new(content.to_string());
64
65        let mut warnings = Vec::new();
66
67        let lines: Vec<&str> = content.lines().collect();
68
69        for (line_num, &line) in lines.iter().enumerate() {
70            let trailing_spaces = Self::count_trailing_spaces(line);
71
72            // Skip if no trailing spaces
73            if trailing_spaces == 0 {
74                continue;
75            }
76
77            // Handle empty lines
78            if line.trim().is_empty() {
79                if trailing_spaces > 0 {
80                    // Check if this is an empty list item line and config allows it
81                    let prev_line = if line_num > 0 { Some(lines[line_num - 1]) } else { None };
82                    if self.config.list_item_empty_lines && Self::is_empty_list_item_line(line, prev_line) {
83                        continue;
84                    }
85
86                    // Calculate precise character range for all trailing spaces on empty line
87                    let (start_line, start_col, end_line, end_col) = calculate_trailing_range(line_num + 1, line, 0);
88
89                    warnings.push(LintWarning {
90                        rule_name: Some(self.name()),
91                        line: start_line,
92                        column: start_col,
93                        end_line,
94                        end_column: end_col,
95                        message: "Empty line has trailing spaces".to_string(),
96                        severity: Severity::Warning,
97                        fix: Some(Fix {
98                            range: _line_index.line_col_to_byte_range_with_length(line_num + 1, 1, line.len()),
99                            replacement: String::new(),
100                        }),
101                    });
102                }
103                continue;
104            }
105
106            // Handle code blocks if not in strict mode
107            if !self.config.strict {
108                // Use pre-computed line info
109                if let Some(line_info) = ctx.line_info(line_num + 1)
110                    && line_info.in_code_block
111                {
112                    continue;
113                }
114            }
115
116            // Check if it's a valid line break
117            // Special handling: if the content ends with a newline, the last line from .lines()
118            // is not really the "last line" in terms of trailing spaces rules
119            let is_truly_last_line = line_num == lines.len() - 1 && !content.ends_with('\n');
120            if !self.config.strict && !is_truly_last_line && trailing_spaces == self.config.br_spaces {
121                continue;
122            }
123
124            // Check if this is an empty blockquote line ("> " or ">> " etc)
125            // These are allowed by MD028 to have a single trailing space
126            let trimmed = line.trim_end();
127            let is_empty_blockquote_with_space = trimmed.chars().all(|c| c == '>' || c == ' ' || c == '\t')
128                && trimmed.contains('>')
129                && trailing_spaces == 1;
130
131            if is_empty_blockquote_with_space {
132                continue; // Allow single trailing space for empty blockquote lines
133            }
134            // Calculate precise character range for all trailing spaces
135            let (start_line, start_col, end_line, end_col) =
136                calculate_trailing_range(line_num + 1, line, trimmed.len());
137
138            warnings.push(LintWarning {
139                rule_name: Some(self.name()),
140                line: start_line,
141                column: start_col,
142                end_line,
143                end_column: end_col,
144                message: if trailing_spaces == 1 {
145                    "Trailing space found".to_string()
146                } else {
147                    format!("{trailing_spaces} trailing spaces found")
148                },
149                severity: Severity::Warning,
150                fix: Some(Fix {
151                    range: _line_index.line_col_to_byte_range_with_length(
152                        line_num + 1,
153                        trimmed.len() + 1,
154                        trailing_spaces,
155                    ),
156                    replacement: if !self.config.strict && !is_truly_last_line && trailing_spaces > 0 {
157                        " ".repeat(self.config.br_spaces)
158                    } else {
159                        String::new()
160                    },
161                }),
162            });
163        }
164
165        Ok(warnings)
166    }
167
168    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
169        let content = ctx.content;
170
171        // For simple cases (strict mode), use fast regex approach
172        if self.config.strict {
173            // In strict mode, remove ALL trailing spaces everywhere
174            return Ok(get_cached_regex(r"(?m) +$")
175                .unwrap()
176                .replace_all(content, "")
177                .to_string());
178        }
179
180        // For complex cases, we need line-by-line processing but with optimizations
181        let lines: Vec<&str> = content.lines().collect();
182        let mut result = String::with_capacity(content.len()); // Pre-allocate capacity
183
184        for (i, line) in lines.iter().enumerate() {
185            // Fast path: if no trailing spaces, just add the line
186            if !line.ends_with(' ') {
187                result.push_str(line);
188                result.push('\n');
189                continue;
190            }
191
192            let trimmed = line.trim_end();
193            let trailing_spaces = Self::count_trailing_spaces(line);
194
195            // Handle empty lines - fast regex replacement
196            if trimmed.is_empty() {
197                // Check if this is an empty list item line and config allows it
198                let prev_line = if i > 0 { Some(lines[i - 1]) } else { None };
199                if self.config.list_item_empty_lines && Self::is_empty_list_item_line(line, prev_line) {
200                    result.push_str(line);
201                } else {
202                    // Remove all trailing spaces - line is empty so don't add anything
203                }
204                result.push('\n');
205                continue;
206            }
207
208            // Handle code blocks if not in strict mode
209            if let Some(line_info) = ctx.line_info(i + 1)
210                && line_info.in_code_block
211            {
212                result.push_str(line);
213                result.push('\n');
214                continue;
215            }
216
217            // No special handling for empty blockquote lines - treat them like regular lines
218
219            // Handle lines with trailing spaces
220            let is_truly_last_line = i == lines.len() - 1 && !content.ends_with('\n');
221
222            result.push_str(trimmed);
223
224            // Check if this line is a heading - headings should never have trailing spaces
225            let is_heading = if let Some(line_info) = ctx.line_info(i + 1) {
226                line_info.heading.is_some()
227            } else {
228                // Fallback: check if line starts with #
229                trimmed.starts_with('#')
230            };
231
232            // Check if this is an empty blockquote line (just ">")
233            let is_empty_blockquote = if let Some(line_info) = ctx.line_info(i + 1) {
234                line_info.blockquote.as_ref().is_some_and(|bq| bq.content.is_empty())
235            } else {
236                false
237            };
238
239            // In non-strict mode, preserve line breaks by normalizing to br_spaces
240            // BUT: Never preserve trailing spaces in headings or empty blockquotes as they serve no purpose
241            if !self.config.strict && !is_truly_last_line && trailing_spaces > 0 && !is_heading && !is_empty_blockquote
242            {
243                // Optimize for common case of 2 spaces
244                match self.config.br_spaces {
245                    0 => {}
246                    1 => result.push(' '),
247                    2 => result.push_str("  "),
248                    n => result.push_str(&" ".repeat(n)),
249                }
250            }
251            result.push('\n');
252        }
253
254        // Preserve original ending (with or without final newline)
255        if !content.ends_with('\n') && result.ends_with('\n') {
256            result.pop();
257        }
258
259        Ok(result)
260    }
261
262    fn as_any(&self) -> &dyn std::any::Any {
263        self
264    }
265
266    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
267        // Skip if content is empty or has no spaces at all
268        ctx.content.is_empty() || !ctx.content.contains(' ')
269    }
270
271    fn category(&self) -> RuleCategory {
272        RuleCategory::Whitespace
273    }
274
275    fn default_config_section(&self) -> Option<(String, toml::Value)> {
276        let default_config = MD009Config::default();
277        let json_value = serde_json::to_value(&default_config).ok()?;
278        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
279
280        if let toml::Value::Table(table) = toml_value {
281            if !table.is_empty() {
282                Some((MD009Config::RULE_NAME.to_string(), toml::Value::Table(table)))
283            } else {
284                None
285            }
286        } else {
287            None
288        }
289    }
290
291    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
292    where
293        Self: Sized,
294    {
295        let rule_config = crate::rule_config_serde::load_rule_config::<MD009Config>(config);
296        Box::new(Self::from_config_struct(rule_config))
297    }
298}
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303    use crate::lint_context::LintContext;
304    use crate::rule::Rule;
305
306    #[test]
307    fn test_no_trailing_spaces() {
308        let rule = MD009TrailingSpaces::default();
309        let content = "This is a line\nAnother line\nNo trailing spaces";
310        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
311        let result = rule.check(&ctx).unwrap();
312        assert!(result.is_empty());
313    }
314
315    #[test]
316    fn test_basic_trailing_spaces() {
317        let rule = MD009TrailingSpaces::default();
318        let content = "Line with spaces   \nAnother line  \nClean line";
319        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
320        let result = rule.check(&ctx).unwrap();
321        // Default br_spaces=2, so line with 2 spaces is OK
322        assert_eq!(result.len(), 1);
323        assert_eq!(result[0].line, 1);
324        assert_eq!(result[0].message, "3 trailing spaces found");
325    }
326
327    #[test]
328    fn test_fix_basic_trailing_spaces() {
329        let rule = MD009TrailingSpaces::default();
330        let content = "Line with spaces   \nAnother line  \nClean line";
331        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
332        let fixed = rule.fix(&ctx).unwrap();
333        assert_eq!(fixed, "Line with spaces  \nAnother line  \nClean line");
334    }
335
336    #[test]
337    fn test_strict_mode() {
338        let rule = MD009TrailingSpaces::new(2, true);
339        let content = "Line with spaces  \nCode block:  \n```  \nCode with spaces  \n```  ";
340        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
341        let result = rule.check(&ctx).unwrap();
342        // In strict mode, all trailing spaces are flagged
343        assert_eq!(result.len(), 5);
344
345        let fixed = rule.fix(&ctx).unwrap();
346        assert_eq!(fixed, "Line with spaces\nCode block:\n```\nCode with spaces\n```");
347    }
348
349    #[test]
350    fn test_non_strict_mode_with_code_blocks() {
351        let rule = MD009TrailingSpaces::new(2, false);
352        let content = "Line with spaces  \n```\nCode with spaces  \n```\nOutside code  ";
353        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
354        let result = rule.check(&ctx).unwrap();
355        // In non-strict mode, code blocks are not checked
356        // Line 1 has 2 spaces (= br_spaces), so it's OK
357        // Line 5 is last line without newline, so trailing spaces are flagged
358        assert_eq!(result.len(), 1);
359        assert_eq!(result[0].line, 5);
360    }
361
362    #[test]
363    fn test_br_spaces_preservation() {
364        let rule = MD009TrailingSpaces::new(2, false);
365        let content = "Line with two spaces  \nLine with three spaces   \nLine with one space ";
366        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
367        let result = rule.check(&ctx).unwrap();
368        // br_spaces=2, so lines with exactly 2 spaces are OK
369        // Line 2 has 3 spaces (will be normalized to 2)
370        // Line 3 has 1 space and is last line without newline (will be removed)
371        assert_eq!(result.len(), 2);
372        assert_eq!(result[0].line, 2);
373        assert_eq!(result[1].line, 3);
374
375        let fixed = rule.fix(&ctx).unwrap();
376        // Line 1: keeps 2 spaces
377        // Line 2: normalized from 3 to 2 spaces
378        // Line 3: last line without newline, spaces removed
379        assert_eq!(
380            fixed,
381            "Line with two spaces  \nLine with three spaces  \nLine with one space"
382        );
383    }
384
385    #[test]
386    fn test_empty_lines_with_spaces() {
387        let rule = MD009TrailingSpaces::default();
388        let content = "Normal line\n   \n  \nAnother line";
389        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
390        let result = rule.check(&ctx).unwrap();
391        assert_eq!(result.len(), 2);
392        assert_eq!(result[0].message, "Empty line has trailing spaces");
393        assert_eq!(result[1].message, "Empty line has trailing spaces");
394
395        let fixed = rule.fix(&ctx).unwrap();
396        assert_eq!(fixed, "Normal line\n\n\nAnother line");
397    }
398
399    #[test]
400    fn test_empty_blockquote_lines() {
401        let rule = MD009TrailingSpaces::default();
402        let content = "> Quote\n>   \n> More quote";
403        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
404        let result = rule.check(&ctx).unwrap();
405        assert_eq!(result.len(), 1);
406        assert_eq!(result[0].line, 2);
407        assert_eq!(result[0].message, "3 trailing spaces found");
408
409        let fixed = rule.fix(&ctx).unwrap();
410        assert_eq!(fixed, "> Quote\n>\n> More quote"); // All trailing spaces removed
411    }
412
413    #[test]
414    fn test_last_line_handling() {
415        let rule = MD009TrailingSpaces::new(2, false);
416
417        // Content without final newline
418        let content = "First line  \nLast line  ";
419        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
420        let result = rule.check(&ctx).unwrap();
421        // Last line without newline should have trailing spaces removed
422        assert_eq!(result.len(), 1);
423        assert_eq!(result[0].line, 2);
424
425        let fixed = rule.fix(&ctx).unwrap();
426        assert_eq!(fixed, "First line  \nLast line");
427
428        // Content with final newline
429        let content_with_newline = "First line  \nLast line  \n";
430        let ctx = LintContext::new(content_with_newline, crate::config::MarkdownFlavor::Standard);
431        let result = rule.check(&ctx).unwrap();
432        // Both lines should preserve br_spaces
433        assert!(result.is_empty());
434    }
435
436    #[test]
437    fn test_single_trailing_space() {
438        let rule = MD009TrailingSpaces::new(2, false);
439        let content = "Line with one space ";
440        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
441        let result = rule.check(&ctx).unwrap();
442        assert_eq!(result.len(), 1);
443        assert_eq!(result[0].message, "Trailing space found");
444    }
445
446    #[test]
447    fn test_tabs_not_spaces() {
448        let rule = MD009TrailingSpaces::default();
449        let content = "Line with tab\t\nLine with spaces  ";
450        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
451        let result = rule.check(&ctx).unwrap();
452        // Only spaces are checked, not tabs
453        assert_eq!(result.len(), 1);
454        assert_eq!(result[0].line, 2);
455    }
456
457    #[test]
458    fn test_mixed_content() {
459        let rule = MD009TrailingSpaces::new(2, false);
460        // Construct content with actual trailing spaces using string concatenation
461        let mut content = String::new();
462        content.push_str("# Heading");
463        content.push_str("   "); // Add 3 trailing spaces (more than br_spaces=2)
464        content.push('\n');
465        content.push_str("Normal paragraph\n> Blockquote\n>\n```\nCode block\n```\n- List item\n");
466
467        let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
468        let result = rule.check(&ctx).unwrap();
469        // Should flag the line with trailing spaces
470        assert_eq!(result.len(), 1);
471        assert_eq!(result[0].line, 1);
472        assert!(result[0].message.contains("trailing spaces"));
473    }
474
475    #[test]
476    fn test_column_positions() {
477        let rule = MD009TrailingSpaces::default();
478        let content = "Text   ";
479        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
480        let result = rule.check(&ctx).unwrap();
481        assert_eq!(result.len(), 1);
482        assert_eq!(result[0].column, 5); // After "Text"
483        assert_eq!(result[0].end_column, 8); // After all spaces
484    }
485
486    #[test]
487    fn test_default_config() {
488        let rule = MD009TrailingSpaces::default();
489        let config = rule.default_config_section();
490        assert!(config.is_some());
491        let (name, _value) = config.unwrap();
492        assert_eq!(name, "MD009");
493    }
494
495    #[test]
496    fn test_from_config() {
497        let mut config = crate::config::Config::default();
498        let mut rule_config = crate::config::RuleConfig::default();
499        rule_config
500            .values
501            .insert("br_spaces".to_string(), toml::Value::Integer(3));
502        rule_config
503            .values
504            .insert("strict".to_string(), toml::Value::Boolean(true));
505        config.rules.insert("MD009".to_string(), rule_config);
506
507        let rule = MD009TrailingSpaces::from_config(&config);
508        let content = "Line   ";
509        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
510        let result = rule.check(&ctx).unwrap();
511        assert_eq!(result.len(), 1);
512
513        // In strict mode, should remove all spaces
514        let fixed = rule.fix(&ctx).unwrap();
515        assert_eq!(fixed, "Line");
516    }
517
518    #[test]
519    fn test_list_item_empty_lines() {
520        // Create rule with list_item_empty_lines enabled
521        let config = MD009Config {
522            list_item_empty_lines: true,
523            ..Default::default()
524        };
525        let rule = MD009TrailingSpaces::from_config_struct(config);
526
527        // Test unordered list with empty line
528        let content = "- First item\n  \n- Second item";
529        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
530        let result = rule.check(&ctx).unwrap();
531        // Should not flag the empty line with spaces after list item
532        assert!(result.is_empty());
533
534        // Test ordered list with empty line
535        let content = "1. First item\n  \n2. Second item";
536        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
537        let result = rule.check(&ctx).unwrap();
538        assert!(result.is_empty());
539
540        // Test that non-list empty lines are still flagged
541        let content = "Normal paragraph\n  \nAnother paragraph";
542        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
543        let result = rule.check(&ctx).unwrap();
544        assert_eq!(result.len(), 1);
545        assert_eq!(result[0].line, 2);
546    }
547
548    #[test]
549    fn test_list_item_empty_lines_disabled() {
550        // Default config has list_item_empty_lines disabled
551        let rule = MD009TrailingSpaces::default();
552
553        let content = "- First item\n  \n- Second item";
554        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
555        let result = rule.check(&ctx).unwrap();
556        // Should flag the empty line with spaces
557        assert_eq!(result.len(), 1);
558        assert_eq!(result[0].line, 2);
559    }
560
561    #[test]
562    fn test_performance_large_document() {
563        let rule = MD009TrailingSpaces::default();
564        let mut content = String::new();
565        for i in 0..1000 {
566            content.push_str(&format!("Line {i} with spaces  \n"));
567        }
568        let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
569        let result = rule.check(&ctx).unwrap();
570        // Default br_spaces=2, so all lines with 2 spaces are OK
571        assert_eq!(result.len(), 0);
572    }
573
574    #[test]
575    fn test_preserve_content_after_fix() {
576        let rule = MD009TrailingSpaces::new(2, false);
577        let content = "**Bold** text  \n*Italic* text  \n[Link](url)  ";
578        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
579        let fixed = rule.fix(&ctx).unwrap();
580        assert_eq!(fixed, "**Bold** text  \n*Italic* text  \n[Link](url)");
581    }
582
583    #[test]
584    fn test_nested_blockquotes() {
585        let rule = MD009TrailingSpaces::default();
586        let content = "> > Nested  \n> >   \n> Normal  ";
587        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
588        let result = rule.check(&ctx).unwrap();
589        // Line 2 has empty blockquote, line 3 is last line without newline
590        assert_eq!(result.len(), 2);
591        assert_eq!(result[0].line, 2);
592        assert_eq!(result[1].line, 3);
593
594        let fixed = rule.fix(&ctx).unwrap();
595        // The fix adds a single space after empty blockquote markers
596        assert_eq!(fixed, "> > Nested  \n> >  \n> Normal");
597    }
598
599    #[test]
600    fn test_windows_line_endings() {
601        let rule = MD009TrailingSpaces::default();
602        // Note: This test simulates Windows line endings behavior
603        let content = "Line with spaces  \r\nAnother line  ";
604        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
605        let result = rule.check(&ctx).unwrap();
606        // Line 1 has 2 spaces (= br_spaces) so it's OK
607        // Line 2 is last line without newline, so it's flagged
608        assert_eq!(result.len(), 1);
609        assert_eq!(result[0].line, 2);
610    }
611}