use std::path::Path;
use anyhow::Result;
use globset::{Glob, GlobSetBuilder};
#[derive(Debug)]
pub struct FilterParams<'a> {
pub glob: &'a [String],
pub exclude: &'a [String],
pub root: Option<&'a str>,
pub file_types: &'a [String],
}
pub fn apply_file_filters(files: &[String], params: FilterParams) -> Result<Vec<String>> {
if files.is_empty() {
return Ok(vec![]);
}
tracing::trace!("Applying filters to {} files", files.len());
tracing::trace!("Glob patterns: {:?}", params.glob);
tracing::trace!("Exclude patterns: {:?}", params.exclude);
tracing::trace!("File types: {:?}", params.file_types);
let mut filtered = files.to_vec();
filtered = filter_by_glob(&filtered, params.glob)?;
filtered = filter_by_exclude(&filtered, params.exclude)?;
filtered = filter_by_root(&filtered, params.root)?;
filtered = filter_by_file_types(&filtered, params.file_types)?;
tracing::trace!("Filtered from {} to {} files", files.len(), filtered.len());
Ok(filtered)
}
fn filter_by_glob(files: &[String], patterns: &[String]) -> Result<Vec<String>> {
if patterns.is_empty() {
return Ok(files.to_vec());
}
let mut builder = GlobSetBuilder::new();
let mut has_patterns = false;
for pattern in patterns {
if !pattern.is_empty() {
builder.add(Glob::new(pattern)?);
has_patterns = true;
}
}
if !has_patterns {
return Ok(files.to_vec());
}
let glob_set = builder.build()?;
let filtered: Vec<String> = files
.iter()
.filter(|file| glob_set.is_match(file))
.cloned()
.collect();
tracing::trace!("Glob filter: {} -> {} files", files.len(), filtered.len());
Ok(filtered)
}
fn filter_by_exclude(files: &[String], patterns: &[String]) -> Result<Vec<String>> {
if patterns.is_empty() {
return Ok(files.to_vec());
}
let mut builder = GlobSetBuilder::new();
let mut has_patterns = false;
for pattern in patterns {
if !pattern.is_empty() {
builder.add(Glob::new(pattern)?);
has_patterns = true;
}
}
if !has_patterns {
return Ok(files.to_vec());
}
let exclude_set = builder.build()?;
let filtered: Vec<String> = files
.iter()
.filter(|file| !exclude_set.is_match(file))
.cloned()
.collect();
tracing::trace!(
"Exclude filter: {} -> {} files",
files.len(),
filtered.len()
);
Ok(filtered)
}
fn filter_by_root(files: &[String], root: Option<&str>) -> Result<Vec<String>> {
let Some(root_prefix) = root else {
return Ok(files.to_vec());
};
if root_prefix.is_empty() {
return Ok(files.to_vec());
}
let filtered: Vec<String> = files
.iter()
.filter_map(|file| {
if file.starts_with(root_prefix) {
Some(file.replacen(root_prefix, "./", 1))
} else {
None
}
})
.collect();
tracing::trace!(
"Root filter ({}): {} -> {} files",
root_prefix,
files.len(),
filtered.len()
);
Ok(filtered)
}
fn filter_by_file_types(files: &[String], types: &[String]) -> Result<Vec<String>> {
if types.is_empty() {
return Ok(files.to_vec());
}
let mut filtered = Vec::new();
for file in files {
if matches_file_types(file, types)? {
filtered.push(file.clone());
}
}
tracing::trace!(
"File type filter: {} -> {} files",
files.len(),
filtered.len()
);
Ok(filtered)
}
fn matches_file_types(file: &str, types: &[String]) -> Result<bool> {
use std::os::unix::fs::PermissionsExt;
let path = Path::new(file);
let metadata = match std::fs::symlink_metadata(path) {
Ok(meta) => meta,
Err(_) => {
tracing::warn!("Cannot access file metadata for: {}", file);
return Ok(false);
}
};
let is_symlink = metadata.file_type().is_symlink();
let is_executable = metadata.permissions().mode() & 0o111 != 0;
for file_type in types {
match file_type.as_str() {
"executable" => {
if is_executable && !is_symlink {
return Ok(true);
}
}
"not executable" => {
if !is_executable || is_symlink {
return Ok(true);
}
}
"symlink" => {
if is_symlink {
return Ok(true);
}
}
"not symlink" => {
if !is_symlink {
return Ok(true);
}
}
"text" => {
if !is_symlink && metadata.is_file() && is_text_file(path)? {
return Ok(true);
}
}
"binary" => {
if !is_symlink && metadata.is_file() && !is_text_file(path)? {
return Ok(true);
}
}
_ => {
tracing::warn!("Unknown file type filter: {}", file_type);
}
}
}
Ok(false)
}
fn is_text_file(path: &Path) -> Result<bool> {
use std::io::Read;
let mut file = std::fs::File::open(path)?;
let mut buffer = [0u8; 1024];
let bytes_read = file.read(&mut buffer)?;
if bytes_read == 0 {
return Ok(true); }
let has_null = buffer[..bytes_read].contains(&0);
Ok(!has_null)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_files_returns_empty() {
let params = FilterParams {
glob: &[],
exclude: &[],
root: None,
file_types: &[],
};
let result = apply_file_filters(&[], params).unwrap();
assert!(result.is_empty());
}
#[test]
fn test_no_filters_returns_all_files() {
let files = vec!["test.rs".to_string(), "test.js".to_string()];
let params = FilterParams {
glob: &[],
exclude: &[],
root: None,
file_types: &[],
};
let result = apply_file_filters(&files, params).unwrap();
assert_eq!(result, files);
}
#[test]
fn test_glob_filtering() {
let files = vec![
"test.rs".to_string(),
"test.js".to_string(),
"main.rs".to_string(),
"README.md".to_string(),
];
let params = FilterParams {
glob: &["*.rs".to_string()],
exclude: &[],
root: None,
file_types: &[],
};
let result = apply_file_filters(&files, params).unwrap();
assert_eq!(result, vec!["test.rs", "main.rs"]);
}
#[test]
fn test_exclude_filtering() {
let files = vec![
"test.rs".to_string(),
"test.js".to_string(),
"README.md".to_string(),
];
let params = FilterParams {
glob: &[],
exclude: &["README.md".to_string()],
root: None,
file_types: &[],
};
let result = apply_file_filters(&files, params).unwrap();
assert_eq!(result, vec!["test.rs", "test.js"]);
}
#[test]
fn test_root_filtering() {
let files = vec![
"src/main.rs".to_string(),
"tests/test.rs".to_string(),
"other/file.js".to_string(),
];
let params = FilterParams {
glob: &[],
exclude: &[],
root: Some("src/"),
file_types: &[],
};
let result = apply_file_filters(&files, params).unwrap();
assert_eq!(result, vec!["./main.rs"]);
}
#[test]
fn test_combined_filtering() {
let files = vec![
"src/main.rs".to_string(),
"src/lib.rs".to_string(),
"src/test.js".to_string(),
"tests/test.rs".to_string(),
"README.md".to_string(),
];
let params = FilterParams {
glob: &["*.rs".to_string()],
exclude: &["README.md".to_string()],
root: Some("src/"),
file_types: &[],
};
let result = apply_file_filters(&files, params).unwrap();
assert_eq!(result, vec!["./main.rs", "./lib.rs"]);
}
#[test]
fn test_multiple_glob_patterns() {
let files = vec![
"test.rs".to_string(),
"test.js".to_string(),
"test.py".to_string(),
"README.md".to_string(),
];
let params = FilterParams {
glob: &["*.rs".to_string(), "*.js".to_string()],
exclude: &[],
root: None,
file_types: &[],
};
let result = apply_file_filters(&files, params).unwrap();
assert_eq!(result, vec!["test.rs", "test.js"]);
}
#[test]
fn test_empty_patterns_ignored() {
let files = vec!["test.rs".to_string()];
let params = FilterParams {
glob: &["".to_string(), "*.rs".to_string()],
exclude: &["".to_string()],
root: None,
file_types: &[],
};
let result = apply_file_filters(&files, params).unwrap();
assert_eq!(result, vec!["test.rs"]);
}
}