Skip to main content

reformat_core/
endings.rs

1//! Line ending normalization transformer
2
3use std::fs;
4use std::path::Path;
5use walkdir::WalkDir;
6
7/// Line ending style
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum LineEnding {
10    /// Unix-style: \n
11    Lf,
12    /// Windows-style: \r\n
13    Crlf,
14    /// Classic Mac-style: \r
15    Cr,
16}
17
18impl LineEnding {
19    /// Parse from string representation
20    pub fn parse(s: &str) -> Option<Self> {
21        match s {
22            "lf" | "LF" | "unix" => Some(LineEnding::Lf),
23            "crlf" | "CRLF" | "windows" => Some(LineEnding::Crlf),
24            "cr" | "CR" | "mac" => Some(LineEnding::Cr),
25            _ => None,
26        }
27    }
28
29    fn as_bytes(&self) -> &[u8] {
30        match self {
31            LineEnding::Lf => b"\n",
32            LineEnding::Crlf => b"\r\n",
33            LineEnding::Cr => b"\r",
34        }
35    }
36}
37
38/// Options for line ending normalization
39#[derive(Debug, Clone)]
40pub struct EndingsOptions {
41    /// Target line ending style
42    pub style: LineEnding,
43    /// File extensions to process
44    pub file_extensions: Vec<String>,
45    /// Process directories recursively
46    pub recursive: bool,
47    /// Dry run mode (don't modify files)
48    pub dry_run: bool,
49}
50
51impl Default for EndingsOptions {
52    fn default() -> Self {
53        EndingsOptions {
54            style: LineEnding::Lf,
55            file_extensions: vec![
56                ".py", ".pyx", ".pxd", ".pxi", ".c", ".h", ".cpp", ".hpp", ".rs", ".go", ".java",
57                ".js", ".ts", ".jsx", ".tsx", ".md", ".qmd", ".txt", ".toml", ".yaml", ".yml",
58                ".json", ".xml", ".html", ".css", ".sh", ".bat",
59            ]
60            .iter()
61            .map(|s| s.to_string())
62            .collect(),
63            recursive: true,
64            dry_run: false,
65        }
66    }
67}
68
69/// Line ending normalizer
70pub struct EndingsNormalizer {
71    options: EndingsOptions,
72}
73
74impl EndingsNormalizer {
75    /// Creates a new normalizer with the given options
76    pub fn new(options: EndingsOptions) -> Self {
77        EndingsNormalizer { options }
78    }
79
80    /// Creates a normalizer with default options
81    pub fn with_defaults() -> Self {
82        EndingsNormalizer {
83            options: EndingsOptions::default(),
84        }
85    }
86
87    /// Checks if a file should be processed
88    fn should_process(&self, path: &Path) -> bool {
89        if !path.is_file() {
90            return false;
91        }
92
93        // Skip hidden files
94        if path.components().any(|c| {
95            c.as_os_str()
96                .to_str()
97                .map(|s| s.starts_with('.'))
98                .unwrap_or(false)
99        }) {
100            return false;
101        }
102
103        // Skip build directories
104        let skip_dirs = [
105            "build",
106            "__pycache__",
107            ".git",
108            "node_modules",
109            "venv",
110            ".venv",
111            "target",
112        ];
113        if path.components().any(|c| {
114            c.as_os_str()
115                .to_str()
116                .map(|s| skip_dirs.contains(&s))
117                .unwrap_or(false)
118        }) {
119            return false;
120        }
121
122        if let Some(ext) = path.extension() {
123            let ext_str = format!(".{}", ext.to_string_lossy());
124            self.options.file_extensions.contains(&ext_str)
125        } else {
126            false
127        }
128    }
129
130    /// Normalize line endings in a single file. Returns the number of lines changed.
131    pub fn normalize_file(&self, path: &Path) -> crate::Result<usize> {
132        if !self.should_process(path) {
133            return Ok(0);
134        }
135
136        let bytes = fs::read(path)?;
137
138        // Detect if file is binary (contains null bytes)
139        if bytes.contains(&0) {
140            return Ok(0);
141        }
142
143        let target = self.options.style;
144        let target_bytes = target.as_bytes();
145
146        // Split into lines preserving original endings for counting
147        let mut changed = 0usize;
148        let mut output: Vec<u8> = Vec::with_capacity(bytes.len());
149        let mut i = 0;
150
151        while i < bytes.len() {
152            if bytes[i] == b'\r' {
153                if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
154                    // CRLF
155                    if target != LineEnding::Crlf {
156                        changed += 1;
157                    }
158                    output.extend_from_slice(target_bytes);
159                    i += 2;
160                } else {
161                    // CR only
162                    if target != LineEnding::Cr {
163                        changed += 1;
164                    }
165                    output.extend_from_slice(target_bytes);
166                    i += 1;
167                }
168            } else if bytes[i] == b'\n' {
169                // LF only
170                if target != LineEnding::Lf {
171                    changed += 1;
172                }
173                output.extend_from_slice(target_bytes);
174                i += 1;
175            } else {
176                output.push(bytes[i]);
177                i += 1;
178            }
179        }
180
181        if changed > 0 {
182            if self.options.dry_run {
183                println!(
184                    "Would normalize {} line ending(s) in '{}'",
185                    changed,
186                    path.display()
187                );
188            } else {
189                fs::write(path, output)?;
190                println!(
191                    "Normalized {} line ending(s) in '{}'",
192                    changed,
193                    path.display()
194                );
195            }
196        }
197
198        Ok(changed)
199    }
200
201    /// Processes a directory or file. Returns (files_changed, endings_changed).
202    pub fn process(&self, path: &Path) -> crate::Result<(usize, usize)> {
203        let mut total_files = 0;
204        let mut total_endings = 0;
205
206        if path.is_file() {
207            let endings = self.normalize_file(path)?;
208            if endings > 0 {
209                total_files = 1;
210                total_endings = endings;
211            }
212        } else if path.is_dir() {
213            if self.options.recursive {
214                for entry in WalkDir::new(path).into_iter().filter_map(|e| e.ok()) {
215                    if entry.file_type().is_file() {
216                        let endings = self.normalize_file(entry.path())?;
217                        if endings > 0 {
218                            total_files += 1;
219                            total_endings += endings;
220                        }
221                    }
222                }
223            } else {
224                for entry in fs::read_dir(path)? {
225                    let entry = entry?;
226                    let entry_path = entry.path();
227                    if entry_path.is_file() {
228                        let endings = self.normalize_file(&entry_path)?;
229                        if endings > 0 {
230                            total_files += 1;
231                            total_endings += endings;
232                        }
233                    }
234                }
235            }
236        }
237
238        Ok((total_files, total_endings))
239    }
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245    use std::fs;
246
247    #[test]
248    fn test_crlf_to_lf() {
249        let dir = std::env::temp_dir().join("reformat_endings_crlf_lf");
250        fs::create_dir_all(&dir).unwrap();
251
252        let file = dir.join("test.txt");
253        fs::write(&file, b"line1\r\nline2\r\nline3\r\n").unwrap();
254
255        let normalizer = EndingsNormalizer::with_defaults();
256        let (files, endings) = normalizer.process(&file).unwrap();
257
258        assert_eq!(files, 1);
259        assert_eq!(endings, 3);
260
261        let content = fs::read(&file).unwrap();
262        assert_eq!(content, b"line1\nline2\nline3\n");
263
264        fs::remove_dir_all(&dir).unwrap();
265    }
266
267    #[test]
268    fn test_lf_to_crlf() {
269        let dir = std::env::temp_dir().join("reformat_endings_lf_crlf");
270        fs::create_dir_all(&dir).unwrap();
271
272        let file = dir.join("test.txt");
273        fs::write(&file, b"line1\nline2\nline3\n").unwrap();
274
275        let options = EndingsOptions {
276            style: LineEnding::Crlf,
277            ..Default::default()
278        };
279        let normalizer = EndingsNormalizer::new(options);
280        let (files, endings) = normalizer.process(&file).unwrap();
281
282        assert_eq!(files, 1);
283        assert_eq!(endings, 3);
284
285        let content = fs::read(&file).unwrap();
286        assert_eq!(content, b"line1\r\nline2\r\nline3\r\n");
287
288        fs::remove_dir_all(&dir).unwrap();
289    }
290
291    #[test]
292    fn test_cr_to_lf() {
293        let dir = std::env::temp_dir().join("reformat_endings_cr_lf");
294        fs::create_dir_all(&dir).unwrap();
295
296        let file = dir.join("test.txt");
297        fs::write(&file, b"line1\rline2\rline3\r").unwrap();
298
299        let normalizer = EndingsNormalizer::with_defaults();
300        let (files, endings) = normalizer.process(&file).unwrap();
301
302        assert_eq!(files, 1);
303        assert_eq!(endings, 3);
304
305        let content = fs::read(&file).unwrap();
306        assert_eq!(content, b"line1\nline2\nline3\n");
307
308        fs::remove_dir_all(&dir).unwrap();
309    }
310
311    #[test]
312    fn test_mixed_endings() {
313        let dir = std::env::temp_dir().join("reformat_endings_mixed");
314        fs::create_dir_all(&dir).unwrap();
315
316        let file = dir.join("test.txt");
317        fs::write(&file, b"line1\r\nline2\nline3\rline4\n").unwrap();
318
319        let normalizer = EndingsNormalizer::with_defaults();
320        let (files, endings) = normalizer.process(&file).unwrap();
321
322        assert_eq!(files, 1);
323        assert_eq!(endings, 2); // CRLF and CR converted, LFs already correct
324
325        let content = fs::read(&file).unwrap();
326        assert_eq!(content, b"line1\nline2\nline3\nline4\n");
327
328        fs::remove_dir_all(&dir).unwrap();
329    }
330
331    #[test]
332    fn test_already_normalized() {
333        let dir = std::env::temp_dir().join("reformat_endings_noop");
334        fs::create_dir_all(&dir).unwrap();
335
336        let file = dir.join("test.txt");
337        fs::write(&file, b"line1\nline2\nline3\n").unwrap();
338
339        let normalizer = EndingsNormalizer::with_defaults();
340        let (files, endings) = normalizer.process(&file).unwrap();
341
342        assert_eq!(files, 0);
343        assert_eq!(endings, 0);
344
345        fs::remove_dir_all(&dir).unwrap();
346    }
347
348    #[test]
349    fn test_dry_run() {
350        let dir = std::env::temp_dir().join("reformat_endings_dry");
351        fs::create_dir_all(&dir).unwrap();
352
353        let file = dir.join("test.txt");
354        let original = b"line1\r\nline2\r\n";
355        fs::write(&file, original).unwrap();
356
357        let options = EndingsOptions {
358            dry_run: true,
359            ..Default::default()
360        };
361        let normalizer = EndingsNormalizer::new(options);
362        let (_, endings) = normalizer.process(&file).unwrap();
363
364        assert_eq!(endings, 2);
365
366        // File should be unchanged
367        let content = fs::read(&file).unwrap();
368        assert_eq!(content, original);
369
370        fs::remove_dir_all(&dir).unwrap();
371    }
372
373    #[test]
374    fn test_skip_binary_files() {
375        let dir = std::env::temp_dir().join("reformat_endings_binary");
376        fs::create_dir_all(&dir).unwrap();
377
378        let file = dir.join("test.txt");
379        let mut content = b"line1\r\nline2\r\n".to_vec();
380        content.push(0); // null byte makes it binary
381        fs::write(&file, &content).unwrap();
382
383        let normalizer = EndingsNormalizer::with_defaults();
384        let (files, _) = normalizer.process(&file).unwrap();
385
386        assert_eq!(files, 0);
387
388        fs::remove_dir_all(&dir).unwrap();
389    }
390
391    #[test]
392    fn test_skip_hidden_files() {
393        let dir = std::env::temp_dir().join("reformat_endings_hidden");
394        fs::create_dir_all(&dir).unwrap();
395
396        let file = dir.join(".hidden.txt");
397        fs::write(&file, b"line1\r\n").unwrap();
398
399        let normalizer = EndingsNormalizer::with_defaults();
400        let (files, _) = normalizer.process(&file).unwrap();
401
402        assert_eq!(files, 0);
403
404        fs::remove_dir_all(&dir).unwrap();
405    }
406
407    #[test]
408    fn test_recursive_processing() {
409        let dir = std::env::temp_dir().join("reformat_endings_recursive");
410        fs::create_dir_all(&dir).unwrap();
411
412        let sub = dir.join("sub");
413        fs::create_dir_all(&sub).unwrap();
414
415        let f1 = dir.join("a.txt");
416        let f2 = sub.join("b.txt");
417        fs::write(&f1, b"a\r\n").unwrap();
418        fs::write(&f2, b"b\r\n").unwrap();
419
420        let normalizer = EndingsNormalizer::with_defaults();
421        let (files, endings) = normalizer.process(&dir).unwrap();
422
423        assert_eq!(files, 2);
424        assert_eq!(endings, 2);
425
426        fs::remove_dir_all(&dir).unwrap();
427    }
428
429    #[test]
430    fn test_parse_line_ending() {
431        assert_eq!(LineEnding::parse("lf"), Some(LineEnding::Lf));
432        assert_eq!(LineEnding::parse("LF"), Some(LineEnding::Lf));
433        assert_eq!(LineEnding::parse("unix"), Some(LineEnding::Lf));
434        assert_eq!(LineEnding::parse("crlf"), Some(LineEnding::Crlf));
435        assert_eq!(LineEnding::parse("CRLF"), Some(LineEnding::Crlf));
436        assert_eq!(LineEnding::parse("windows"), Some(LineEnding::Crlf));
437        assert_eq!(LineEnding::parse("cr"), Some(LineEnding::Cr));
438        assert_eq!(LineEnding::parse("mac"), Some(LineEnding::Cr));
439        assert_eq!(LineEnding::parse("bogus"), None);
440    }
441}