Skip to main content

reformat_core/
whitespace.rs

1//! Whitespace cleaning transformer
2
3use std::fs;
4use std::path::Path;
5use walkdir::WalkDir;
6
7/// Options for whitespace cleaning
8#[derive(Debug, Clone)]
9pub struct WhitespaceOptions {
10    /// Remove trailing whitespace from lines
11    pub remove_trailing: bool,
12    /// File extensions to process
13    pub file_extensions: Vec<String>,
14    /// Process directories recursively
15    pub recursive: bool,
16    /// Dry run mode (don't modify files)
17    pub dry_run: bool,
18}
19
20impl Default for WhitespaceOptions {
21    fn default() -> Self {
22        WhitespaceOptions {
23            remove_trailing: true,
24            file_extensions: vec![
25                ".py", ".pyx", ".pxd", ".pxi", ".c", ".h", ".cpp", ".hpp", ".rs", ".go", ".java",
26                ".js", ".ts", ".jsx", ".tsx", ".md", ".qmd", ".txt",
27            ]
28            .iter()
29            .map(|s| s.to_string())
30            .collect(),
31            recursive: true,
32            dry_run: false,
33        }
34    }
35}
36
37/// Whitespace cleaner for removing trailing whitespace from files
38pub struct WhitespaceCleaner {
39    options: WhitespaceOptions,
40}
41
42impl WhitespaceCleaner {
43    /// Creates a new whitespace cleaner with the given options
44    pub fn new(options: WhitespaceOptions) -> Self {
45        WhitespaceCleaner { options }
46    }
47
48    /// Creates a cleaner with default options
49    pub fn with_defaults() -> Self {
50        WhitespaceCleaner {
51            options: WhitespaceOptions::default(),
52        }
53    }
54
55    /// Checks if a file should be processed
56    fn should_process(&self, path: &Path) -> bool {
57        if !path.is_file() {
58            return false;
59        }
60
61        // Skip hidden files and directories
62        if path.components().any(|c| {
63            c.as_os_str()
64                .to_str()
65                .map(|s| s.starts_with('.'))
66                .unwrap_or(false)
67        }) {
68            return false;
69        }
70
71        // Skip build directories
72        let skip_dirs = [
73            "build",
74            "__pycache__",
75            ".git",
76            "node_modules",
77            "venv",
78            ".venv",
79            "target",
80        ];
81        if path.components().any(|c| {
82            c.as_os_str()
83                .to_str()
84                .map(|s| skip_dirs.contains(&s))
85                .unwrap_or(false)
86        }) {
87            return false;
88        }
89
90        // Check file extension
91        if let Some(ext) = path.extension() {
92            let ext_str = format!(".{}", ext.to_string_lossy());
93            self.options.file_extensions.contains(&ext_str)
94        } else {
95            false
96        }
97    }
98
99    /// Removes trailing whitespace from a single file
100    pub fn clean_file(&self, path: &Path) -> crate::Result<usize> {
101        if !self.should_process(path) {
102            return Ok(0);
103        }
104
105        let content = fs::read_to_string(path)?;
106        let lines: Vec<&str> = content.lines().collect();
107        let mut cleaned_lines = Vec::new();
108        let mut modified_count = 0;
109
110        for line in &lines {
111            if self.options.remove_trailing {
112                let cleaned = line.trim_end();
113                if cleaned != *line {
114                    modified_count += 1;
115                }
116                cleaned_lines.push(cleaned);
117            } else {
118                cleaned_lines.push(*line);
119            }
120        }
121
122        // Check if file ends with newline
123        let ends_with_newline = content.ends_with('\n');
124
125        if modified_count > 0 {
126            if self.options.dry_run {
127                println!(
128                    "Would clean {} lines in '{}'",
129                    modified_count,
130                    path.display()
131                );
132            } else {
133                let mut cleaned_content = cleaned_lines.join("\n");
134                if ends_with_newline {
135                    cleaned_content.push('\n');
136                }
137                fs::write(path, cleaned_content)?;
138                println!("Cleaned {} lines in '{}'", modified_count, path.display());
139            }
140        }
141
142        Ok(modified_count)
143    }
144
145    /// Processes a directory or file
146    pub fn process(&self, path: &Path) -> crate::Result<(usize, usize)> {
147        let mut total_files = 0;
148        let mut total_lines = 0;
149
150        if path.is_file() {
151            let lines = self.clean_file(path)?;
152            if lines > 0 {
153                total_files = 1;
154                total_lines = lines;
155            }
156        } else if path.is_dir() {
157            if self.options.recursive {
158                for entry in WalkDir::new(path).into_iter().filter_map(|e| e.ok()) {
159                    if entry.file_type().is_file() {
160                        let lines = self.clean_file(entry.path())?;
161                        if lines > 0 {
162                            total_files += 1;
163                            total_lines += lines;
164                        }
165                    }
166                }
167            } else {
168                for entry in fs::read_dir(path)? {
169                    let entry = entry?;
170                    let entry_path = entry.path();
171                    if entry_path.is_file() {
172                        let lines = self.clean_file(&entry_path)?;
173                        if lines > 0 {
174                            total_files += 1;
175                            total_lines += lines;
176                        }
177                    }
178                }
179            }
180        }
181
182        Ok((total_files, total_lines))
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189    use std::fs;
190
191    #[test]
192    fn test_remove_trailing_whitespace() {
193        let test_dir = std::env::temp_dir().join("reformat_whitespace_test");
194        fs::create_dir_all(&test_dir).unwrap();
195
196        let test_file = test_dir.join("test.txt");
197        fs::write(&test_file, "line1   \nline2\t\nline3\n").unwrap();
198
199        let cleaner = WhitespaceCleaner::with_defaults();
200        let (files, lines) = cleaner.process(&test_file).unwrap();
201
202        assert_eq!(files, 1);
203        assert_eq!(lines, 2); // line1 and line2 had trailing whitespace
204
205        let content = fs::read_to_string(&test_file).unwrap();
206        assert_eq!(content, "line1\nline2\nline3\n");
207
208        fs::remove_dir_all(&test_dir).unwrap();
209    }
210
211    #[test]
212    fn test_preserve_line_endings() {
213        let test_dir = std::env::temp_dir().join("reformat_whitespace_endings");
214        fs::create_dir_all(&test_dir).unwrap();
215
216        let test_file = test_dir.join("test.txt");
217        fs::write(&test_file, "line1  \nline2\n").unwrap();
218
219        let cleaner = WhitespaceCleaner::with_defaults();
220        cleaner.process(&test_file).unwrap();
221
222        let content = fs::read_to_string(&test_file).unwrap();
223        assert!(content.ends_with('\n'));
224        assert_eq!(content, "line1\nline2\n");
225
226        fs::remove_dir_all(&test_dir).unwrap();
227    }
228
229    #[test]
230    fn test_dry_run_mode() {
231        let test_dir = std::env::temp_dir().join("reformat_whitespace_dry");
232        fs::create_dir_all(&test_dir).unwrap();
233
234        let test_file = test_dir.join("test.txt");
235        let original = "line1   \nline2\n";
236        fs::write(&test_file, original).unwrap();
237
238        let mut opts = WhitespaceOptions::default();
239        opts.dry_run = true;
240
241        let cleaner = WhitespaceCleaner::new(opts);
242        cleaner.process(&test_file).unwrap();
243
244        // File should be unchanged
245        let content = fs::read_to_string(&test_file).unwrap();
246        assert_eq!(content, original);
247
248        fs::remove_dir_all(&test_dir).unwrap();
249    }
250
251    #[test]
252    fn test_skip_hidden_files() {
253        let test_dir = std::env::temp_dir().join("reformat_whitespace_hidden");
254        fs::create_dir_all(&test_dir).unwrap();
255
256        let hidden_file = test_dir.join(".hidden.txt");
257        fs::write(&hidden_file, "line1   \n").unwrap();
258
259        let cleaner = WhitespaceCleaner::with_defaults();
260        let (files, _) = cleaner.process(&hidden_file).unwrap();
261
262        // Hidden file should be skipped
263        assert_eq!(files, 0);
264
265        fs::remove_dir_all(&test_dir).unwrap();
266    }
267
268    #[test]
269    fn test_file_extension_filtering() {
270        let test_dir = std::env::temp_dir().join("reformat_whitespace_ext");
271        fs::create_dir_all(&test_dir).unwrap();
272
273        let txt_file = test_dir.join("test.txt");
274        let other_file = test_dir.join("test.xyz");
275
276        fs::write(&txt_file, "line1   \n").unwrap();
277        fs::write(&other_file, "line1   \n").unwrap();
278
279        let mut opts = WhitespaceOptions::default();
280        opts.file_extensions = vec![".txt".to_string()];
281
282        let cleaner = WhitespaceCleaner::new(opts);
283        let (files, _) = cleaner.process(&test_dir).unwrap();
284
285        // Only .txt should be processed
286        assert_eq!(files, 1);
287
288        let txt_content = fs::read_to_string(&txt_file).unwrap();
289        let other_content = fs::read_to_string(&other_file).unwrap();
290
291        assert_eq!(txt_content, "line1\n");
292        assert_eq!(other_content, "line1   \n"); // Unchanged
293
294        fs::remove_dir_all(&test_dir).unwrap();
295    }
296
297    #[test]
298    fn test_recursive_processing() {
299        let test_dir = std::env::temp_dir().join("reformat_whitespace_recursive");
300        fs::create_dir_all(&test_dir).unwrap();
301
302        let sub_dir = test_dir.join("subdir");
303        fs::create_dir_all(&sub_dir).unwrap();
304
305        let file1 = test_dir.join("file1.txt");
306        let file2 = sub_dir.join("file2.txt");
307
308        fs::write(&file1, "line1   \n").unwrap();
309        fs::write(&file2, "line2\t\n").unwrap();
310
311        let cleaner = WhitespaceCleaner::with_defaults();
312        let (files, lines) = cleaner.process(&test_dir).unwrap();
313
314        assert_eq!(files, 2);
315        assert_eq!(lines, 2);
316
317        fs::remove_dir_all(&test_dir).unwrap();
318    }
319}