use anyhow::{Context, Result};
use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
use ra_ap_rustc_lexer::{strip_shebang, tokenize, FrontmatterAllowed, TokenKind};
use std::collections::HashSet;
use std::ffi::OsStr;
use std::fs;
use std::path::{Path, PathBuf};
use walkdir::{DirEntry, WalkDir};
#[derive(Debug, Clone)]
pub struct Config {
pub roots: Vec<PathBuf>,
pub check: bool,
pub verbose: bool,
pub hidden: bool,
pub follow_links: bool,
pub no_backup: bool,
pub strip_doc_comments: bool,
pub backup_suffix: String,
pub exclude_dirs: Vec<String>,
pub include_globs: Vec<String>,
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub struct RunStats {
pub files_seen: usize,
pub files_changed: usize,
}
impl Config {
pub fn validate(&self) -> Result<()> {
anyhow::ensure!(
self.no_backup || !self.backup_suffix.is_empty(),
"backup suffix must not be empty"
);
Ok(())
}
}
pub fn run(cfg: &Config) -> Result<RunStats> {
cfg.validate()?;
let include = build_include_set(&cfg.include_globs)?;
let mut stats = RunStats::default();
let mut seen = HashSet::new();
for root in &cfg.roots {
let walker = WalkDir::new(root)
.follow_links(cfg.follow_links)
.into_iter()
.filter_entry(|e| !should_skip_dir(e, cfg.hidden, &cfg.exclude_dirs));
for entry in walker {
let entry =
entry.with_context(|| format!("failed while walking {}", root.display()))?;
if entry.file_type().is_file()
&& is_rust_file(entry.path())
&& is_included(entry.path(), root, &include)
&& seen.insert(dedup_key(entry.path()))
{
stats.files_seen += 1;
if process_file(entry.path(), cfg)? {
stats.files_changed += 1;
}
}
}
}
if cfg.check && stats.files_changed > 0 {
anyhow::bail!("{} file(s) would change", stats.files_changed);
}
Ok(stats)
}
pub fn remove_backups(cfg: &Config) -> Result<RunStats> {
anyhow::ensure!(
!cfg.backup_suffix.is_empty(),
"backup suffix must not be empty"
);
let mut stats = RunStats::default();
let mut seen = HashSet::new();
for root in &cfg.roots {
let walker = WalkDir::new(root)
.follow_links(cfg.follow_links)
.into_iter()
.filter_entry(|e| !should_skip_dir(e, cfg.hidden, &cfg.exclude_dirs));
for entry in walker {
let entry =
entry.with_context(|| format!("failed while walking {}", root.display()))?;
if entry.file_type().is_file()
&& is_backup_file(entry.path(), &cfg.backup_suffix)
&& seen.insert(dedup_key(entry.path()))
{
stats.files_seen += 1;
if cfg.verbose {
println!("{}", entry.path().display());
}
fs::remove_file(entry.path())
.with_context(|| format!("failed to remove {}", entry.path().display()))?;
stats.files_changed += 1;
}
}
}
Ok(stats)
}
pub fn restore_backups(cfg: &Config) -> Result<RunStats> {
anyhow::ensure!(
!cfg.backup_suffix.is_empty(),
"backup suffix must not be empty"
);
let mut stats = RunStats::default();
let mut seen = HashSet::new();
for root in &cfg.roots {
let walker = WalkDir::new(root)
.follow_links(cfg.follow_links)
.into_iter()
.filter_entry(|e| !should_skip_dir(e, cfg.hidden, &cfg.exclude_dirs));
for entry in walker {
let entry =
entry.with_context(|| format!("failed while walking {}", root.display()))?;
if entry.file_type().is_file()
&& is_backup_file(entry.path(), &cfg.backup_suffix)
&& seen.insert(dedup_key(entry.path()))
{
let backup = entry.path();
let original = original_path(backup, &cfg.backup_suffix)?;
stats.files_seen += 1;
if cfg.verbose {
println!("{} -> {}", backup.display(), original.display());
}
fs::rename(backup, &original).with_context(|| {
format!(
"failed to restore {} -> {}",
backup.display(),
original.display()
)
})?;
stats.files_changed += 1;
}
}
}
Ok(stats)
}
pub fn strip_non_doc_comments(input: &str) -> Result<String> {
strip_comments(input, false)
}
pub fn strip_comments(input: &str, strip_docs: bool) -> Result<String> {
let mut output = String::with_capacity(input.len());
let mut protected: Vec<(usize, usize)> = Vec::new();
let mut offset = 0usize;
if let Some(shebang_len) = strip_shebang(input) {
output.push_str(&input[..shebang_len]);
offset = shebang_len;
}
let rest = &input[offset..];
let mut pos = 0usize;
let mut swallow_newline = false;
for token in tokenize(rest, FrontmatterAllowed::Yes) {
let len = token.len as usize;
let end = pos + len;
let text = &rest[pos..end];
pos = end;
let swallow = swallow_newline;
swallow_newline = false;
match token.kind {
TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
if doc_style.is_some() && !strip_docs {
push_protected(&mut output, &mut protected, text);
} else if let Some(line_start) = blank_line_start(&output) {
output.truncate(line_start);
swallow_newline = true;
} else {
preserve_removed_comment(text, &mut output);
}
}
TokenKind::Literal { .. } | TokenKind::Frontmatter { .. } => {
push_protected(&mut output, &mut protected, text);
}
_ => {
let text = if swallow {
strip_one_leading_newline(text)
} else {
text
};
output.push_str(text);
}
}
}
anyhow::ensure!(pos == rest.len(), "lexer did not consume full input");
Ok(strip_trailing_whitespace(&output, &protected))
}
fn push_protected(output: &mut String, protected: &mut Vec<(usize, usize)>, text: &str) {
let start = output.len();
output.push_str(text);
protected.push((start, output.len()));
}
fn strip_trailing_whitespace(output: &str, protected: &[(usize, usize)]) -> String {
let bytes = output.as_bytes();
let n = bytes.len();
let is_protected = |idx: usize| {
let i = protected.partition_point(|&(s, _)| s <= idx);
i > 0 && protected[i - 1].1 > idx
};
let mut result = String::with_capacity(n);
let mut seg_start = 0usize;
let mut i = 0usize;
while i < n {
if (bytes[i] == b' ' || bytes[i] == b'\t') && !is_protected(i) {
let mut j = i;
while j < n && (bytes[j] == b' ' || bytes[j] == b'\t') && !is_protected(j) {
j += 1;
}
let ends_line = j >= n || bytes[j] == b'\n' || bytes[j] == b'\r';
if ends_line {
result.push_str(&output[seg_start..i]);
seg_start = j;
}
i = j;
} else {
i += 1;
}
}
result.push_str(&output[seg_start..]);
result
}
fn blank_line_start(output: &str) -> Option<usize> {
let start = output.rfind('\n').map(|i| i + 1).unwrap_or(0);
if output[start..].bytes().all(|b| b == b' ' || b == b'\t') {
Some(start)
} else {
None
}
}
fn strip_one_leading_newline(text: &str) -> &str {
text.strip_prefix("\r\n")
.or_else(|| text.strip_prefix('\n'))
.unwrap_or(text)
}
fn preserve_removed_comment(comment: &str, out: &mut String) {
if comment.starts_with("//") {
if comment.ends_with('\n') {
out.push('\n');
}
return;
}
for ch in comment.chars() {
if ch == '\n' {
out.push('\n');
}
}
}
fn process_file(path: &Path, cfg: &Config) -> Result<bool> {
let original =
fs::read_to_string(path).with_context(|| format!("failed to read {}", path.display()))?;
let stripped = strip_comments(&original, cfg.strip_doc_comments)
.with_context(|| format!("failed to strip comments in {}", path.display()))?;
if stripped == original {
return Ok(false);
}
if cfg.verbose || cfg.check {
println!("{}", path.display());
}
if !cfg.check {
if !cfg.no_backup {
let backup = backup_path(path, &cfg.backup_suffix)?;
fs::copy(path, &backup).with_context(|| {
format!(
"failed to create backup {} -> {}",
path.display(),
backup.display()
)
})?;
}
fs::write(path, stripped).with_context(|| format!("failed to write {}", path.display()))?;
}
Ok(true)
}
fn backup_path(path: &Path, suffix: &str) -> Result<PathBuf> {
let file_name = path
.file_name()
.and_then(|s| s.to_str())
.context("invalid UTF-8 file name")?;
Ok(path.with_file_name(format!("{file_name}{suffix}")))
}
fn original_path(path: &Path, suffix: &str) -> Result<PathBuf> {
let file_name = path
.file_name()
.and_then(|s| s.to_str())
.context("invalid UTF-8 file name")?;
let stem = file_name
.strip_suffix(suffix)
.with_context(|| format!("{file_name} does not end with backup suffix {suffix}"))?;
Ok(path.with_file_name(stem))
}
fn is_hidden(entry: &DirEntry) -> bool {
entry
.file_name()
.to_str()
.map(|s| s.starts_with('.'))
.unwrap_or(false)
}
fn should_skip_dir(entry: &DirEntry, hidden: bool, excluded: &[String]) -> bool {
if !entry.file_type().is_dir() {
return false;
}
if entry.depth() > 0 && !hidden && is_hidden(entry) {
return true;
}
let name = entry.file_name().to_string_lossy();
excluded.iter().any(|x| x == &name)
}
fn is_rust_file(path: &Path) -> bool {
path.extension() == Some(OsStr::new("rs"))
}
fn build_include_set(globs: &[String]) -> Result<Option<GlobSet>> {
if globs.is_empty() {
return Ok(None);
}
let mut builder = GlobSetBuilder::new();
for pattern in globs {
let glob = GlobBuilder::new(pattern)
.literal_separator(true)
.build()
.with_context(|| format!("invalid include glob: {pattern}"))?;
builder.add(glob);
}
Ok(Some(
builder.build().context("failed to compile include globs")?,
))
}
fn is_included(path: &Path, root: &Path, include: &Option<GlobSet>) -> bool {
match include {
None => true,
Some(set) => {
let rel = path.strip_prefix(root).unwrap_or(path);
set.is_match(rel)
|| set.is_match(path)
|| fs::canonicalize(path)
.map(|abs| set.is_match(abs))
.unwrap_or(false)
}
}
}
fn dedup_key(path: &Path) -> PathBuf {
fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
}
fn is_backup_file(path: &Path, suffix: &str) -> bool {
path.file_name()
.and_then(|s| s.to_str())
.map(|name| name.ends_with(suffix))
.unwrap_or(false)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn keeps_doc_comments_and_removes_normal_comments() {
let src = "/// docs\nfn main() { // kill\n let x = 1; /* gone */\n}\n";
let out = strip_non_doc_comments(src).unwrap();
assert!(out.contains("/// docs"));
assert!(!out.contains("kill"));
assert!(!out.contains("gone"));
}
#[test]
fn strips_doc_comments_when_requested() {
let src = "/// docs\nfn main() { // kill\n let x = 1; /* gone */\n}\n/*! crate */\n";
let out = strip_comments(src, true).unwrap();
assert!(!out.contains("docs"));
assert!(!out.contains("crate"));
assert!(!out.contains("kill"));
assert!(!out.contains("gone"));
assert_eq!(out, "fn main() {\n let x = 1;\n}\n");
}
#[test]
fn removes_blank_line_left_by_standalone_comment() {
let src = "fn main() {\n // explain\n let x = 1;\n}\n";
let out = strip_non_doc_comments(src).unwrap();
assert_eq!(out, "fn main() {\n let x = 1;\n}\n");
}
#[test]
fn keeps_originally_blank_line_after_standalone_comment() {
let src = "// header\n\nfn main() {}\n";
let out = strip_non_doc_comments(src).unwrap();
assert_eq!(out, "\nfn main() {}\n");
}
#[test]
fn removes_standalone_block_comment_line() {
let src = "fn main() {\n /* a\n b */\n let x = 1;\n}\n";
let out = strip_non_doc_comments(src).unwrap();
assert_eq!(out, "fn main() {\n let x = 1;\n}\n");
}
#[test]
fn keeps_raw_string_comment_like_text() {
let src = "fn main() { let s = r#\"// not a comment /* no */\"#; }\n";
let out = strip_non_doc_comments(src).unwrap();
assert_eq!(src, out);
}
#[test]
fn keeps_block_doc_comments() {
let src = "/** docs */\nfn main() {}\n/*! crate docs */\n";
let out = strip_non_doc_comments(src).unwrap();
assert_eq!(src, out);
}
#[test]
fn preserves_line_count_for_block_comments() {
let src = "fn main() { /* a\n b\n c */ let x = 1; }\n";
let out = strip_non_doc_comments(src).unwrap();
assert_eq!(src.lines().count(), out.lines().count());
}
#[test]
fn preserves_shebang() {
let src = "#!/usr/bin/env rust-script\n// hi\nfn main() {}\n";
let out = strip_non_doc_comments(src).unwrap();
assert!(out.starts_with("#!/usr/bin/env rust-script\n"));
}
#[test]
fn no_trailing_whitespace_left_after_removing_comments() {
let src = "let x = 1; // foo\nlet y = 2; /* bar */ \n // indented\n";
let out = strip_non_doc_comments(src).unwrap();
assert_eq!(out, "let x = 1;\nlet y = 2;\n");
for line in out.lines() {
assert_eq!(
line,
line.trim_end(),
"line has trailing whitespace: {line:?}"
);
}
}
#[test]
fn keeps_trailing_whitespace_inside_string_literal() {
let src = "let s = \"foo \nbar\"; // gone\n";
let out = strip_non_doc_comments(src).unwrap();
assert_eq!(out, "let s = \"foo \nbar\";\n");
}
fn cfg_for(root: &Path, include: Vec<String>, exclude: Vec<String>) -> Config {
Config {
roots: vec![root.to_path_buf()],
check: false,
verbose: false,
hidden: false,
follow_links: false,
no_backup: true,
strip_doc_comments: false,
backup_suffix: ".bak".into(),
exclude_dirs: exclude,
include_globs: include,
}
}
const COMMENTED: &str = "fn f() { // strip me\n}\n";
#[test]
fn include_globs_limit_processed_files() {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::create_dir_all(root.join("examples")).unwrap();
fs::write(root.join("src/a.rs"), COMMENTED).unwrap();
fs::write(root.join("examples/b.rs"), COMMENTED).unwrap();
let cfg = cfg_for(root, vec!["src/**/*.rs".into()], vec![]);
let stats = run(&cfg).unwrap();
assert_eq!(stats.files_seen, 1);
assert_eq!(stats.files_changed, 1);
assert!(!fs::read_to_string(root.join("src/a.rs"))
.unwrap()
.contains("strip me"));
assert_eq!(
fs::read_to_string(root.join("examples/b.rs")).unwrap(),
COMMENTED
);
}
#[test]
fn empty_include_processes_everything() {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
fs::write(root.join("a.rs"), COMMENTED).unwrap();
let cfg = cfg_for(root, vec![], vec![]);
let stats = run(&cfg).unwrap();
assert_eq!(stats.files_seen, 1);
assert_eq!(stats.files_changed, 1);
}
#[test]
fn include_matches_absolute_path() {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(root.join("src/a.rs"), COMMENTED).unwrap();
fs::write(root.join("src/b.rs"), COMMENTED).unwrap();
let target = fs::canonicalize(root.join("src/a.rs")).unwrap();
let cfg = cfg_for(root, vec![target.to_string_lossy().into_owned()], vec![]);
let stats = run(&cfg).unwrap();
assert_eq!(stats.files_seen, 1);
assert_eq!(stats.files_changed, 1);
assert!(!fs::read_to_string(root.join("src/a.rs"))
.unwrap()
.contains("strip me"));
assert_eq!(
fs::read_to_string(root.join("src/b.rs")).unwrap(),
COMMENTED
);
}
#[test]
fn exclude_wins_over_include() {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
fs::create_dir_all(root.join("vendor")).unwrap();
fs::write(root.join("vendor/c.rs"), COMMENTED).unwrap();
let cfg = cfg_for(root, vec!["**/*.rs".into()], vec!["vendor".into()]);
let stats = run(&cfg).unwrap();
assert_eq!(stats.files_seen, 0);
assert_eq!(
fs::read_to_string(root.join("vendor/c.rs")).unwrap(),
COMMENTED
);
}
#[test]
fn invalid_include_glob_is_reported() {
let err = build_include_set(&["src/[".into()]).unwrap_err();
assert!(err.to_string().contains("invalid include glob"));
}
}