1use anyhow::{Context, Result};
2use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
3use ra_ap_rustc_lexer::{strip_shebang, tokenize, FrontmatterAllowed, TokenKind};
4use std::collections::HashSet;
5use std::ffi::OsStr;
6use std::fs;
7use std::path::{Path, PathBuf};
8use walkdir::{DirEntry, WalkDir};
9
10#[derive(Debug, Clone)]
11pub struct Config {
12 pub roots: Vec<PathBuf>,
15 pub check: bool,
16 pub verbose: bool,
17 pub hidden: bool,
18 pub follow_links: bool,
19 pub no_backup: bool,
20 pub strip_doc_comments: bool,
23 pub backup_suffix: String,
24 pub exclude_dirs: Vec<String>,
25 pub include_globs: Vec<String>,
29}
30
31#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
32pub struct RunStats {
33 pub files_seen: usize,
34 pub files_changed: usize,
35}
36
37impl Config {
38 pub fn validate(&self) -> Result<()> {
39 anyhow::ensure!(
40 self.no_backup || !self.backup_suffix.is_empty(),
41 "backup suffix must not be empty"
42 );
43 Ok(())
44 }
45}
46
47pub fn run(cfg: &Config) -> Result<RunStats> {
48 cfg.validate()?;
49 let include = build_include_set(&cfg.include_globs)?;
50 let mut stats = RunStats::default();
51 let mut seen = HashSet::new();
52
53 for root in &cfg.roots {
54 let walker = WalkDir::new(root)
55 .follow_links(cfg.follow_links)
56 .into_iter()
57 .filter_entry(|e| !should_skip_dir(e, cfg.hidden, &cfg.exclude_dirs));
58
59 for entry in walker {
60 let entry =
61 entry.with_context(|| format!("failed while walking {}", root.display()))?;
62 if entry.file_type().is_file()
63 && is_rust_file(entry.path())
64 && is_included(entry.path(), root, &include)
65 && seen.insert(dedup_key(entry.path()))
66 {
67 stats.files_seen += 1;
68 if process_file(entry.path(), cfg)? {
69 stats.files_changed += 1;
70 }
71 }
72 }
73 }
74
75 if cfg.check && stats.files_changed > 0 {
76 anyhow::bail!("{} file(s) would change", stats.files_changed);
77 }
78
79 Ok(stats)
80}
81
82pub fn remove_backups(cfg: &Config) -> Result<RunStats> {
84 anyhow::ensure!(
85 !cfg.backup_suffix.is_empty(),
86 "backup suffix must not be empty"
87 );
88 let mut stats = RunStats::default();
89 let mut seen = HashSet::new();
90
91 for root in &cfg.roots {
92 let walker = WalkDir::new(root)
93 .follow_links(cfg.follow_links)
94 .into_iter()
95 .filter_entry(|e| !should_skip_dir(e, cfg.hidden, &cfg.exclude_dirs));
96
97 for entry in walker {
98 let entry =
99 entry.with_context(|| format!("failed while walking {}", root.display()))?;
100 if entry.file_type().is_file()
101 && is_backup_file(entry.path(), &cfg.backup_suffix)
102 && seen.insert(dedup_key(entry.path()))
103 {
104 stats.files_seen += 1;
105 if cfg.verbose {
106 println!("{}", entry.path().display());
107 }
108 fs::remove_file(entry.path())
109 .with_context(|| format!("failed to remove {}", entry.path().display()))?;
110 stats.files_changed += 1;
111 }
112 }
113 }
114
115 Ok(stats)
116}
117
118pub fn restore_backups(cfg: &Config) -> Result<RunStats> {
122 anyhow::ensure!(
123 !cfg.backup_suffix.is_empty(),
124 "backup suffix must not be empty"
125 );
126 let mut stats = RunStats::default();
127 let mut seen = HashSet::new();
128
129 for root in &cfg.roots {
130 let walker = WalkDir::new(root)
131 .follow_links(cfg.follow_links)
132 .into_iter()
133 .filter_entry(|e| !should_skip_dir(e, cfg.hidden, &cfg.exclude_dirs));
134
135 for entry in walker {
136 let entry =
137 entry.with_context(|| format!("failed while walking {}", root.display()))?;
138 if entry.file_type().is_file()
139 && is_backup_file(entry.path(), &cfg.backup_suffix)
140 && seen.insert(dedup_key(entry.path()))
141 {
142 let backup = entry.path();
143 let original = original_path(backup, &cfg.backup_suffix)?;
144 stats.files_seen += 1;
145 if cfg.verbose {
146 println!("{} -> {}", backup.display(), original.display());
147 }
148 fs::rename(backup, &original).with_context(|| {
149 format!(
150 "failed to restore {} -> {}",
151 backup.display(),
152 original.display()
153 )
154 })?;
155 stats.files_changed += 1;
156 }
157 }
158 }
159
160 Ok(stats)
161}
162
163pub fn strip_non_doc_comments(input: &str) -> Result<String> {
166 strip_comments(input, false)
167}
168
169pub fn strip_comments(input: &str, strip_docs: bool) -> Result<String> {
172 let mut output = String::with_capacity(input.len());
173
174 let mut protected: Vec<(usize, usize)> = Vec::new();
175 let mut offset = 0usize;
176
177 if let Some(shebang_len) = strip_shebang(input) {
178 output.push_str(&input[..shebang_len]);
179 offset = shebang_len;
180 }
181
182 let rest = &input[offset..];
183 let mut pos = 0usize;
184
185 let mut swallow_newline = false;
189
190 for token in tokenize(rest, FrontmatterAllowed::Yes) {
191 let len = token.len as usize;
192 let end = pos + len;
193 let text = &rest[pos..end];
194 pos = end;
195
196 let swallow = swallow_newline;
197 swallow_newline = false;
198
199 match token.kind {
200 TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
201 if doc_style.is_some() && !strip_docs {
202 push_protected(&mut output, &mut protected, text);
203 } else if let Some(line_start) = blank_line_start(&output) {
204 output.truncate(line_start);
208 swallow_newline = true;
209 } else {
210 preserve_removed_comment(text, &mut output);
211 }
212 }
213 TokenKind::Literal { .. } | TokenKind::Frontmatter { .. } => {
214 push_protected(&mut output, &mut protected, text);
215 }
216 _ => {
217 let text = if swallow {
218 strip_one_leading_newline(text)
219 } else {
220 text
221 };
222 output.push_str(text);
223 }
224 }
225 }
226
227 anyhow::ensure!(pos == rest.len(), "lexer did not consume full input");
228 Ok(strip_trailing_whitespace(&output, &protected))
229}
230
231fn push_protected(output: &mut String, protected: &mut Vec<(usize, usize)>, text: &str) {
232 let start = output.len();
233 output.push_str(text);
234 protected.push((start, output.len()));
235}
236
237fn strip_trailing_whitespace(output: &str, protected: &[(usize, usize)]) -> String {
241 let bytes = output.as_bytes();
242 let n = bytes.len();
243
244 let is_protected = |idx: usize| {
245 let i = protected.partition_point(|&(s, _)| s <= idx);
246 i > 0 && protected[i - 1].1 > idx
247 };
248
249 let mut result = String::with_capacity(n);
250 let mut seg_start = 0usize;
251 let mut i = 0usize;
252 while i < n {
253 if (bytes[i] == b' ' || bytes[i] == b'\t') && !is_protected(i) {
254 let mut j = i;
255 while j < n && (bytes[j] == b' ' || bytes[j] == b'\t') && !is_protected(j) {
256 j += 1;
257 }
258 let ends_line = j >= n || bytes[j] == b'\n' || bytes[j] == b'\r';
259 if ends_line {
260 result.push_str(&output[seg_start..i]);
261 seg_start = j;
262 }
263 i = j;
264 } else {
265 i += 1;
266 }
267 }
268 result.push_str(&output[seg_start..]);
269 result
270}
271
272fn blank_line_start(output: &str) -> Option<usize> {
277 let start = output.rfind('\n').map(|i| i + 1).unwrap_or(0);
278 if output[start..].bytes().all(|b| b == b' ' || b == b'\t') {
279 Some(start)
280 } else {
281 None
282 }
283}
284
285fn strip_one_leading_newline(text: &str) -> &str {
288 text.strip_prefix("\r\n")
289 .or_else(|| text.strip_prefix('\n'))
290 .unwrap_or(text)
291}
292
293fn preserve_removed_comment(comment: &str, out: &mut String) {
294 if comment.starts_with("//") {
295 if comment.ends_with('\n') {
296 out.push('\n');
297 }
298 return;
299 }
300
301 for ch in comment.chars() {
302 if ch == '\n' {
303 out.push('\n');
304 }
305 }
306}
307
308fn process_file(path: &Path, cfg: &Config) -> Result<bool> {
309 let original =
310 fs::read_to_string(path).with_context(|| format!("failed to read {}", path.display()))?;
311 let stripped = strip_comments(&original, cfg.strip_doc_comments)
312 .with_context(|| format!("failed to strip comments in {}", path.display()))?;
313
314 if stripped == original {
315 return Ok(false);
316 }
317
318 if cfg.verbose || cfg.check {
319 println!("{}", path.display());
320 }
321
322 if !cfg.check {
323 if !cfg.no_backup {
324 let backup = backup_path(path, &cfg.backup_suffix)?;
325 fs::copy(path, &backup).with_context(|| {
326 format!(
327 "failed to create backup {} -> {}",
328 path.display(),
329 backup.display()
330 )
331 })?;
332 }
333 fs::write(path, stripped).with_context(|| format!("failed to write {}", path.display()))?;
334 }
335
336 Ok(true)
337}
338
339fn backup_path(path: &Path, suffix: &str) -> Result<PathBuf> {
340 let file_name = path
341 .file_name()
342 .and_then(|s| s.to_str())
343 .context("invalid UTF-8 file name")?;
344 Ok(path.with_file_name(format!("{file_name}{suffix}")))
345}
346
347fn original_path(path: &Path, suffix: &str) -> Result<PathBuf> {
351 let file_name = path
352 .file_name()
353 .and_then(|s| s.to_str())
354 .context("invalid UTF-8 file name")?;
355 let stem = file_name
356 .strip_suffix(suffix)
357 .with_context(|| format!("{file_name} does not end with backup suffix {suffix}"))?;
358 Ok(path.with_file_name(stem))
359}
360
361fn is_hidden(entry: &DirEntry) -> bool {
362 entry
363 .file_name()
364 .to_str()
365 .map(|s| s.starts_with('.'))
366 .unwrap_or(false)
367}
368
369fn should_skip_dir(entry: &DirEntry, hidden: bool, excluded: &[String]) -> bool {
370 if !entry.file_type().is_dir() {
371 return false;
372 }
373 if entry.depth() > 0 && !hidden && is_hidden(entry) {
374 return true;
375 }
376 let name = entry.file_name().to_string_lossy();
377 excluded.iter().any(|x| x == &name)
378}
379
380fn is_rust_file(path: &Path) -> bool {
381 path.extension() == Some(OsStr::new("rs"))
382}
383
384fn build_include_set(globs: &[String]) -> Result<Option<GlobSet>> {
389 if globs.is_empty() {
390 return Ok(None);
391 }
392 let mut builder = GlobSetBuilder::new();
393 for pattern in globs {
394 let glob = GlobBuilder::new(pattern)
395 .literal_separator(true)
396 .build()
397 .with_context(|| format!("invalid include glob: {pattern}"))?;
398 builder.add(glob);
399 }
400 Ok(Some(
401 builder.build().context("failed to compile include globs")?,
402 ))
403}
404
405fn is_included(path: &Path, root: &Path, include: &Option<GlobSet>) -> bool {
412 match include {
413 None => true,
414 Some(set) => {
415 let rel = path.strip_prefix(root).unwrap_or(path);
416 set.is_match(rel)
417 || set.is_match(path)
418 || fs::canonicalize(path)
419 .map(|abs| set.is_match(abs))
420 .unwrap_or(false)
421 }
422 }
423}
424
425fn dedup_key(path: &Path) -> PathBuf {
429 fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
430}
431
432fn is_backup_file(path: &Path, suffix: &str) -> bool {
433 path.file_name()
434 .and_then(|s| s.to_str())
435 .map(|name| name.ends_with(suffix))
436 .unwrap_or(false)
437}
438
439#[cfg(test)]
440mod tests {
441 use super::*;
442
443 #[test]
444 fn keeps_doc_comments_and_removes_normal_comments() {
445 let src = "/// docs\nfn main() { // kill\n let x = 1; /* gone */\n}\n";
446 let out = strip_non_doc_comments(src).unwrap();
447 assert!(out.contains("/// docs"));
448 assert!(!out.contains("kill"));
449 assert!(!out.contains("gone"));
450 }
451
452 #[test]
453 fn strips_doc_comments_when_requested() {
454 let src = "/// docs\nfn main() { // kill\n let x = 1; /* gone */\n}\n/*! crate */\n";
455 let out = strip_comments(src, true).unwrap();
456 assert!(!out.contains("docs"));
457 assert!(!out.contains("crate"));
458 assert!(!out.contains("kill"));
459 assert!(!out.contains("gone"));
460
461 assert_eq!(out, "fn main() {\n let x = 1;\n}\n");
464 }
465
466 #[test]
467 fn removes_blank_line_left_by_standalone_comment() {
468 let src = "fn main() {\n // explain\n let x = 1;\n}\n";
469 let out = strip_non_doc_comments(src).unwrap();
470 assert_eq!(out, "fn main() {\n let x = 1;\n}\n");
471 }
472
473 #[test]
474 fn keeps_originally_blank_line_after_standalone_comment() {
475 let src = "// header\n\nfn main() {}\n";
476 let out = strip_non_doc_comments(src).unwrap();
477 assert_eq!(out, "\nfn main() {}\n");
478 }
479
480 #[test]
481 fn removes_standalone_block_comment_line() {
482 let src = "fn main() {\n /* a\n b */\n let x = 1;\n}\n";
483 let out = strip_non_doc_comments(src).unwrap();
484 assert_eq!(out, "fn main() {\n let x = 1;\n}\n");
485 }
486
487 #[test]
488 fn keeps_raw_string_comment_like_text() {
489 let src = "fn main() { let s = r#\"// not a comment /* no */\"#; }\n";
490 let out = strip_non_doc_comments(src).unwrap();
491 assert_eq!(src, out);
492 }
493
494 #[test]
495 fn keeps_block_doc_comments() {
496 let src = "/** docs */\nfn main() {}\n/*! crate docs */\n";
497 let out = strip_non_doc_comments(src).unwrap();
498 assert_eq!(src, out);
499 }
500
501 #[test]
502 fn preserves_line_count_for_block_comments() {
503 let src = "fn main() { /* a\n b\n c */ let x = 1; }\n";
504 let out = strip_non_doc_comments(src).unwrap();
505 assert_eq!(src.lines().count(), out.lines().count());
506 }
507
508 #[test]
509 fn preserves_shebang() {
510 let src = "#!/usr/bin/env rust-script\n// hi\nfn main() {}\n";
511 let out = strip_non_doc_comments(src).unwrap();
512 assert!(out.starts_with("#!/usr/bin/env rust-script\n"));
513 }
514
515 #[test]
516 fn no_trailing_whitespace_left_after_removing_comments() {
517 let src = "let x = 1; // foo\nlet y = 2; /* bar */ \n // indented\n";
518 let out = strip_non_doc_comments(src).unwrap();
519 assert_eq!(out, "let x = 1;\nlet y = 2;\n");
521 for line in out.lines() {
522 assert_eq!(
523 line,
524 line.trim_end(),
525 "line has trailing whitespace: {line:?}"
526 );
527 }
528 }
529
530 #[test]
531 fn keeps_trailing_whitespace_inside_string_literal() {
532 let src = "let s = \"foo \nbar\"; // gone\n";
533 let out = strip_non_doc_comments(src).unwrap();
534 assert_eq!(out, "let s = \"foo \nbar\";\n");
535 }
536
537 fn cfg_for(root: &Path, include: Vec<String>, exclude: Vec<String>) -> Config {
538 Config {
539 roots: vec![root.to_path_buf()],
540 check: false,
541 verbose: false,
542 hidden: false,
543 follow_links: false,
544 no_backup: true,
545 strip_doc_comments: false,
546 backup_suffix: ".bak".into(),
547 exclude_dirs: exclude,
548 include_globs: include,
549 }
550 }
551
552 const COMMENTED: &str = "fn f() { // strip me\n}\n";
553
554 #[test]
555 fn include_globs_limit_processed_files() {
556 let dir = tempfile::tempdir().unwrap();
557 let root = dir.path();
558 fs::create_dir_all(root.join("src")).unwrap();
559 fs::create_dir_all(root.join("examples")).unwrap();
560 fs::write(root.join("src/a.rs"), COMMENTED).unwrap();
561 fs::write(root.join("examples/b.rs"), COMMENTED).unwrap();
562
563 let cfg = cfg_for(root, vec!["src/**/*.rs".into()], vec![]);
564 let stats = run(&cfg).unwrap();
565
566 assert_eq!(stats.files_seen, 1);
567 assert_eq!(stats.files_changed, 1);
568 assert!(!fs::read_to_string(root.join("src/a.rs"))
569 .unwrap()
570 .contains("strip me"));
571
572 assert_eq!(
573 fs::read_to_string(root.join("examples/b.rs")).unwrap(),
574 COMMENTED
575 );
576 }
577
578 #[test]
579 fn empty_include_processes_everything() {
580 let dir = tempfile::tempdir().unwrap();
581 let root = dir.path();
582 fs::write(root.join("a.rs"), COMMENTED).unwrap();
583
584 let cfg = cfg_for(root, vec![], vec![]);
585 let stats = run(&cfg).unwrap();
586 assert_eq!(stats.files_seen, 1);
587 assert_eq!(stats.files_changed, 1);
588 }
589
590 #[test]
591 fn include_matches_absolute_path() {
592 let dir = tempfile::tempdir().unwrap();
593 let root = dir.path();
594 fs::create_dir_all(root.join("src")).unwrap();
595 fs::write(root.join("src/a.rs"), COMMENTED).unwrap();
596 fs::write(root.join("src/b.rs"), COMMENTED).unwrap();
597
598 let target = fs::canonicalize(root.join("src/a.rs")).unwrap();
599 let cfg = cfg_for(root, vec![target.to_string_lossy().into_owned()], vec![]);
600 let stats = run(&cfg).unwrap();
601
602 assert_eq!(stats.files_seen, 1);
603 assert_eq!(stats.files_changed, 1);
604 assert!(!fs::read_to_string(root.join("src/a.rs"))
605 .unwrap()
606 .contains("strip me"));
607 assert_eq!(
608 fs::read_to_string(root.join("src/b.rs")).unwrap(),
609 COMMENTED
610 );
611 }
612
613 #[test]
614 fn exclude_wins_over_include() {
615 let dir = tempfile::tempdir().unwrap();
616 let root = dir.path();
617 fs::create_dir_all(root.join("vendor")).unwrap();
618 fs::write(root.join("vendor/c.rs"), COMMENTED).unwrap();
619
620 let cfg = cfg_for(root, vec!["**/*.rs".into()], vec!["vendor".into()]);
621 let stats = run(&cfg).unwrap();
622
623 assert_eq!(stats.files_seen, 0);
624 assert_eq!(
625 fs::read_to_string(root.join("vendor/c.rs")).unwrap(),
626 COMMENTED
627 );
628 }
629
630 #[test]
631 fn invalid_include_glob_is_reported() {
632 let err = build_include_set(&["src/[".into()]).unwrap_err();
633 assert!(err.to_string().contains("invalid include glob"));
634 }
635}