use std::collections::HashSet;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use globset::{Glob, GlobSet, GlobSetBuilder};
use ignore::WalkBuilder;
use crate::loc::language::{LanguageSpec, detect, detect_by_shebang};
#[derive(Clone, Debug, Default)]
pub struct ExcludeFilter {
include_extensions: Option<HashSet<Box<str>>>,
extensions: HashSet<Box<str>>,
dirs: HashSet<Box<str>>,
globs: Option<GlobSet>,
}
impl ExcludeFilter {
pub fn new(
include_extensions: &[String],
extensions: &[String],
dirs: &[String],
globs: &[String],
) -> Self {
let normalize_exts = |exts: &[String]| -> HashSet<Box<str>> {
exts.iter()
.map(|e| e.trim_start_matches('.').to_lowercase().into_boxed_str())
.filter(|e| !e.is_empty())
.collect()
};
let include_extensions = if include_extensions.is_empty() {
None
} else {
Some(normalize_exts(include_extensions))
};
let extensions = normalize_exts(extensions);
let dirs = dirs.iter().map(|d| d.clone().into_boxed_str()).collect();
let globs = Self::build_glob_set(globs);
Self {
include_extensions,
extensions,
dirs,
globs,
}
}
#[cfg(test)]
pub fn is_empty(&self) -> bool {
self.include_extensions.is_none()
&& self.extensions.is_empty()
&& self.dirs.is_empty()
&& self.globs.is_none()
}
fn build_glob_set(patterns: &[String]) -> Option<GlobSet> {
if patterns.is_empty() {
return None;
}
let mut builder = GlobSetBuilder::new();
for pattern in patterns {
match Glob::new(pattern) {
Ok(g) => {
builder.add(g);
}
Err(e) => eprintln!("warning: invalid glob '{pattern}': {e}"),
}
}
match builder.build() {
Ok(set) => Some(set),
Err(e) => {
eprintln!("warning: failed to compile glob set: {e}");
None
}
}
}
pub fn excludes_dir(&self, name: &str) -> bool {
self.dirs.contains(name)
}
pub fn excludes_file(&self, path: &Path, walk_root: &Path) -> bool {
self.excludes_by_extension(path) || self.excludes_by_glob(path, walk_root)
}
fn excludes_by_extension(&self, path: &Path) -> bool {
let ext = match path.extension().and_then(|e| e.to_str()) {
Some(e) => e,
None => {
return self.include_extensions.is_some();
}
};
if let Some(ref include) = self.include_extensions {
return !include.iter().any(|inc| inc.eq_ignore_ascii_case(ext));
}
!self.extensions.is_empty()
&& self
.extensions
.iter()
.any(|excl| excl.eq_ignore_ascii_case(ext))
}
fn excludes_by_glob(&self, path: &Path, walk_root: &Path) -> bool {
self.globs.as_ref().is_some_and(|g| {
let relative = path.strip_prefix(walk_root).unwrap_or(path);
g.is_match(relative)
})
}
}
#[derive(Debug)]
pub struct WalkConfig<'a> {
pub path: &'a Path,
pub include_tests: bool,
pub filter: &'a ExcludeFilter,
}
impl<'a> WalkConfig<'a> {
pub fn new(path: &'a Path, include_tests: bool, filter: &'a ExcludeFilter) -> Self {
Self {
path,
include_tests,
filter,
}
}
pub fn exclude_tests(&self) -> bool {
!self.include_tests
}
pub fn source_files(&self) -> Vec<(PathBuf, &'static LanguageSpec)> {
source_files(self.path, self.exclude_tests(), self.filter)
}
pub fn collect_analysis<T>(
&self,
f: impl Fn(&Path, &LanguageSpec) -> Result<Option<T>, Box<dyn std::error::Error>>,
) -> Vec<T> {
collect_analysis(self.path, self.exclude_tests(), self.filter, f)
}
}
pub const TEST_DIRS: &[&str] = &["tests", "test", "__tests__", "spec"];
struct TestPattern {
exts: &'static [&'static str],
suffixes: &'static [&'static str],
prefixes: &'static [&'static str],
}
const TEST_PATTERNS: &[TestPattern] = &[
TestPattern {
exts: &["rs", "go", "exs", "dart"],
suffixes: &["_test"],
prefixes: &[],
},
TestPattern {
exts: &["py"],
suffixes: &["_test"],
prefixes: &["test_"],
},
TestPattern {
exts: &["rb"],
suffixes: &["_test", "_spec"],
prefixes: &[],
},
TestPattern {
exts: &["php"],
suffixes: &["Test", "_test"],
prefixes: &[],
},
TestPattern {
exts: &["js", "jsx", "mjs", "cjs", "ts", "tsx", "mts", "cts"],
suffixes: &[".test", ".spec"],
prefixes: &[],
},
TestPattern {
exts: &["java", "kt", "kts", "cs", "swift"],
suffixes: &["Test", "Tests"],
prefixes: &[],
},
TestPattern {
exts: &["scala"],
suffixes: &["Test", "Spec"],
prefixes: &[],
},
TestPattern {
exts: &["c"],
suffixes: &["_test", "_unittest"],
prefixes: &["test_"],
},
TestPattern {
exts: &["cc", "cpp", "cxx"],
suffixes: &["_test", "_unittest", "Test"],
prefixes: &["test_"],
},
TestPattern {
exts: &["hs"],
suffixes: &["Test", "Spec"],
prefixes: &[],
},
];
pub fn is_test_file(path: &Path) -> bool {
let file_name = match path.file_name().and_then(|n| n.to_str()) {
Some(n) => n,
None => return false,
};
let Some(dot) = file_name.rfind('.') else {
return false;
};
let ext = &file_name[dot + 1..];
let base = &file_name[..dot];
for pattern in TEST_PATTERNS {
if !pattern.exts.contains(&ext) {
continue;
}
if pattern.suffixes.iter().any(|s| base.ends_with(s)) {
return true;
}
if pattern.prefixes.iter().any(|p| base.starts_with(p)) {
return true;
}
}
false
}
pub fn try_detect_shebang(path: &Path) -> Option<&'static LanguageSpec> {
let file = File::open(path).ok()?;
let mut reader = BufReader::new(file);
let mut first_line = String::new();
reader.read_line(&mut first_line).ok()?;
detect_by_shebang(&first_line)
}
pub fn source_files(
path: &Path,
exclude_tests: bool,
filter: &ExcludeFilter,
) -> Vec<(PathBuf, &'static LanguageSpec)> {
let mut result = Vec::new();
for entry in walk(path, exclude_tests, filter) {
let entry = match entry {
Ok(e) => e,
Err(err) => {
eprintln!("warning: {err}");
continue;
}
};
if !entry.file_type().is_some_and(|ft| ft.is_file()) {
continue;
}
let file_path = entry.path();
let Some(spec) = detect(file_path).or_else(|| try_detect_shebang(file_path)) else {
continue;
};
result.push((file_path.to_path_buf(), spec));
}
result
}
pub fn collect_analysis<T>(
path: &Path,
exclude_tests: bool,
filter: &ExcludeFilter,
f: impl Fn(&Path, &LanguageSpec) -> Result<Option<T>, Box<dyn std::error::Error>>,
) -> Vec<T> {
let mut results = Vec::new();
for (file_path, spec) in source_files(path, exclude_tests, filter) {
match f(&file_path, spec) {
Ok(Some(m)) => results.push(m),
Ok(None) => {}
Err(err) => {
eprintln!("warning: {}: {err}", file_path.display());
}
}
}
results
}
pub(crate) fn walk(path: &Path, exclude_tests: bool, filter: &ExcludeFilter) -> ignore::Walk {
let filter = filter.clone();
let walk_root = path.to_path_buf();
WalkBuilder::new(path)
.hidden(false)
.follow_links(false)
.filter_entry(move |entry| {
let ft = entry.file_type();
if ft.is_some_and(|ft| ft.is_dir()) {
if entry.file_name() == ".git" {
return false;
}
if let Some(name) = entry.file_name().to_str() {
if filter.excludes_dir(name) {
return false;
}
if exclude_tests && TEST_DIRS.contains(&name) {
return false;
}
}
} else if ft.is_some_and(|ft| ft.is_file()) {
let file_path = entry.path();
if exclude_tests && is_test_file(file_path) {
return false;
}
if filter.excludes_file(file_path, &walk_root) {
return false;
}
}
true
})
.build()
}
fn ancestor_exclusion_reason(
path: &Path,
exclude_tests: bool,
filter: &ExcludeFilter,
) -> Option<&'static str> {
let parent = path.parent()?;
for component in parent.components() {
if let std::path::Component::Normal(name) = component
&& let Some(name_str) = name.to_str()
{
if filter.excludes_dir(name_str) {
return Some("directory");
}
if exclude_tests && TEST_DIRS.contains(&name_str) {
return Some("test directory");
}
}
}
None
}
pub fn print_excluded_files(
path: &Path,
exclude_tests: bool,
filter: &ExcludeFilter,
) -> Result<(), Box<dyn std::error::Error>> {
let no_filter = ExcludeFilter::default();
let mut count = 0;
for entry in walk(path, false, &no_filter) {
let entry = match entry {
Ok(e) => e,
Err(_) => continue,
};
if !entry.file_type().is_some_and(|ft| ft.is_file()) {
continue;
}
let file_path = entry.path();
let mut reasons = Vec::new();
if exclude_tests && is_test_file(file_path) {
reasons.push("test file");
}
if filter.excludes_by_extension(file_path) {
reasons.push("extension");
}
if let Some(reason) = ancestor_exclusion_reason(file_path, exclude_tests, filter) {
reasons.push(reason);
}
if filter.excludes_by_glob(file_path, path) {
reasons.push("glob");
}
if !reasons.is_empty() {
let relative = file_path.strip_prefix(path).unwrap_or(file_path);
println!("{} ({})", relative.display(), reasons.join(", "));
count += 1;
}
}
println!("\n{count} file(s) excluded");
Ok(())
}
#[cfg(test)]
#[path = "walk_test.rs"]
mod tests;