use std::path::{Path, PathBuf};
use globset::{Glob, GlobSet, GlobSetBuilder};
use crate::defaults::DEFAULT_EXCLUDES;
pub struct FileFilter {
include_set: Option<GlobSet>,
exclude_set: GlobSet,
}
impl FileFilter {
pub fn new(include_patterns: &[String], exclude_patterns: &[String]) -> anyhow::Result<Self> {
let include_set = if include_patterns.is_empty() {
None
} else {
let set = include_patterns
.iter()
.try_fold(GlobSetBuilder::new(), |mut b, p| {
b.add(
Glob::new(p)
.map_err(|e| anyhow::anyhow!("invalid glob pattern '{p}': {e}"))?,
);
Ok::<_, anyhow::Error>(b)
})?
.build()
.map_err(|e| anyhow::anyhow!("failed to build glob set: {e}"))?;
Some(set)
};
let exclude_set = DEFAULT_EXCLUDES
.iter()
.map(|p| Glob::new(p).unwrap())
.chain(
exclude_patterns
.iter()
.map(|p| {
Glob::new(p).map_err(|e| anyhow::anyhow!("invalid glob pattern '{p}': {e}"))
})
.collect::<anyhow::Result<Vec<_>>>()?
.into_iter(),
)
.fold(GlobSetBuilder::new(), |mut b, g| {
b.add(g);
b
})
.build()
.map_err(|e| anyhow::anyhow!("failed to build glob set: {e}"))?;
Ok(Self {
include_set,
exclude_set,
})
}
pub fn should_include(&self, path: &Path) -> bool {
if self.exclude_set.is_match(path) {
return false;
}
self.include_set
.as_ref()
.is_none_or(|set| set.is_match(path))
}
pub fn filter_paths(&self, paths: Vec<PathBuf>) -> impl Iterator<Item = PathBuf> + '_ {
paths.into_iter().filter(|p| self.should_include(p))
}
}
pub fn is_binary(content: &[u8]) -> bool {
content.iter().take(8000).any(|&b| b == 0)
}
pub fn is_minified(content: &str) -> bool {
content.lines().take(5).any(|line| line.len() > 500)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_excludes_applied() {
let filter = FileFilter::new(&[], &[]).unwrap();
assert!(!filter.should_include(Path::new("Cargo.lock")));
assert!(!filter.should_include(Path::new("node_modules/foo.js")));
assert!(!filter.should_include(Path::new("image.png")));
assert!(!filter.should_include(Path::new("target/debug/binary")));
assert!(!filter.should_include(Path::new(".git/HEAD")));
assert!(!filter.should_include(Path::new("bundle.min.js")));
}
#[test]
fn custom_exclude() {
let filter = FileFilter::new(&[], &["*.md".to_string()]).unwrap();
assert!(!filter.should_include(Path::new("README.md")));
assert!(!filter.should_include(Path::new("docs/GUIDE.md")));
assert!(filter.should_include(Path::new("main.rs")));
}
#[test]
fn include_only() {
let filter = FileFilter::new(&["*.rs".to_string()], &[]).unwrap();
assert!(filter.should_include(Path::new("main.rs")));
assert!(filter.should_include(Path::new("src/lib.rs")));
assert!(!filter.should_include(Path::new("README.md")));
assert!(!filter.should_include(Path::new("Cargo.toml")));
}
#[test]
fn include_and_exclude_interaction() {
let filter = FileFilter::new(&["*.rs".to_string()], &["test_*.rs".to_string()]).unwrap();
assert!(filter.should_include(Path::new("main.rs")));
assert!(!filter.should_include(Path::new("test_helper.rs")));
}
#[test]
fn empty_filter_includes_normal_files() {
let filter = FileFilter::new(&[], &[]).unwrap();
assert!(filter.should_include(Path::new("src/main.rs")));
assert!(filter.should_include(Path::new("Cargo.toml")));
assert!(filter.should_include(Path::new("README.md")));
}
#[test]
fn multiple_include_patterns() {
let filter = FileFilter::new(&["*.rs".to_string(), "*.toml".to_string()], &[]).unwrap();
assert!(filter.should_include(Path::new("main.rs")));
assert!(filter.should_include(Path::new("Cargo.toml")));
assert!(!filter.should_include(Path::new("README.md")));
}
#[test]
fn multiple_exclude_patterns() {
let filter = FileFilter::new(&[], &["*.md".to_string(), "*.txt".to_string()]).unwrap();
assert!(!filter.should_include(Path::new("README.md")));
assert!(!filter.should_include(Path::new("notes.txt")));
assert!(filter.should_include(Path::new("main.rs")));
}
#[test]
fn exclude_takes_precedence_over_include() {
let filter = FileFilter::new(&["*.rs".to_string()], &["main.rs".to_string()]).unwrap();
assert!(!filter.should_include(Path::new("main.rs")));
assert!(filter.should_include(Path::new("lib.rs")));
}
#[test]
fn filter_paths_works() {
let filter = FileFilter::new(&["*.rs".to_string()], &[]).unwrap();
let paths = vec![
PathBuf::from("main.rs"),
PathBuf::from("README.md"),
PathBuf::from("lib.rs"),
];
let filtered: Vec<_> = filter.filter_paths(paths).collect();
assert_eq!(
filtered,
vec![PathBuf::from("main.rs"), PathBuf::from("lib.rs")]
);
}
#[test]
fn filter_paths_empty_input() {
let filter = FileFilter::new(&[], &[]).unwrap();
let filtered: Vec<_> = filter.filter_paths(vec![]).collect();
assert!(filtered.is_empty());
}
#[test]
fn is_binary_with_null_bytes() {
let content = b"hello\x00world";
assert!(is_binary(content));
}
#[test]
fn is_binary_with_text() {
let content = b"fn main() { println!(\"hello\"); }";
assert!(!is_binary(content));
}
#[test]
fn is_binary_with_empty() {
assert!(!is_binary(b""));
}
#[test]
fn is_binary_with_utf8() {
assert!(!is_binary("こんにちは世界".as_bytes()));
}
#[test]
fn is_minified_with_long_line() {
let long_line = "a".repeat(501);
assert!(is_minified(&long_line));
}
#[test]
fn is_minified_with_normal_content() {
assert!(!is_minified("fn main() {\n println!(\"hi\");\n}\n"));
}
#[test]
fn is_minified_long_line_after_fifth() {
let mut content = "short\n".repeat(5);
content.push_str(&"a".repeat(501));
assert!(!is_minified(&content));
}
#[test]
fn is_minified_exactly_500_chars() {
let line = "a".repeat(500);
assert!(!is_minified(&line));
}
#[test]
fn is_minified_empty() {
assert!(!is_minified(""));
}
#[test]
fn is_minified_long_line_on_line_3() {
let content = format!("short\nshort\n{}\nshort\nshort\n", "a".repeat(501));
assert!(is_minified(&content));
}
#[test]
fn invalid_include_glob_returns_error() {
let result = FileFilter::new(&["[invalid".to_string()], &[]);
assert!(result.is_err());
}
#[test]
fn invalid_exclude_glob_returns_error() {
let result = FileFilter::new(&[], &["[invalid".to_string()]);
assert!(result.is_err());
}
}