use std::path::Path;
use crate::WalkerFs;
use crate::glob::glob_match;
const MAX_GITIGNORE_SIZE: usize = 1_048_576;
#[derive(Debug, Clone)]
struct IgnoreRule {
pattern: String,
negated: bool,
dir_only: bool,
anchored: bool,
}
impl IgnoreRule {
fn parse(line: &str) -> Option<Self> {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
return None;
}
let mut pattern = line.to_string();
let mut negated = false;
let mut dir_only = false;
if let Some(stripped) = pattern.strip_prefix('!') {
negated = true;
pattern = stripped.to_string();
}
if let Some(stripped) = pattern.strip_suffix('/') {
dir_only = true;
pattern = stripped.to_string();
}
let anchored = pattern.contains('/');
if let Some(stripped) = pattern.strip_prefix('/') {
pattern = stripped.to_string();
}
Some(IgnoreRule {
pattern,
negated,
dir_only,
anchored,
})
}
fn matches(&self, path: &Path, is_dir: bool) -> bool {
if self.dir_only && !is_dir {
return false;
}
let path_str = path.to_string_lossy();
if self.anchored {
self.glob_match_path(&path_str)
} else {
if self.glob_match_path(&path_str) {
return true;
}
if let Some(name) = path.file_name() {
let name_str = name.to_string_lossy();
if glob_match(&self.pattern, &name_str) {
return true;
}
}
false
}
}
fn glob_match_path(&self, path: &str) -> bool {
if self.pattern.contains("**") {
self.match_with_globstar(path)
} else {
glob_match(&self.pattern, path)
}
}
fn match_with_globstar(&self, path: &str) -> bool {
let parts: Vec<&str> = self.pattern.split("**").collect();
if parts.len() == 2 {
let prefix = parts[0].trim_end_matches('/');
let suffix = parts[1].trim_start_matches('/');
let remaining = if prefix.is_empty() {
path
} else if let Some(rest) = path.strip_prefix(prefix) {
rest.trim_start_matches('/')
} else {
return false;
};
if suffix.is_empty() {
return true;
}
for (i, _) in remaining.char_indices() {
let tail = &remaining[i..];
if glob_match(suffix, tail) {
return true;
}
}
glob_match(suffix, remaining)
} else {
glob_match(&self.pattern.replace("**", "*"), path)
}
}
}
#[derive(Debug, Clone, Default)]
pub struct IgnoreFilter {
rules: Vec<IgnoreRule>,
}
impl IgnoreFilter {
pub fn new() -> Self {
Self::default()
}
pub fn with_defaults() -> Self {
let mut filter = Self::new();
filter.add_rule(".git");
filter.add_rule("node_modules");
filter.add_rule("target"); filter.add_rule("__pycache__");
filter.add_rule(".venv");
filter.add_rule("venv");
filter.add_rule("dist");
filter.add_rule("build");
filter.add_rule(".next");
filter
}
pub async fn from_gitignore(
path: &Path,
fs: &impl WalkerFs,
) -> Result<Self, crate::WalkerError> {
let content = fs.read_file(path).await?;
if content.len() > MAX_GITIGNORE_SIZE {
return Err(crate::WalkerError::Io(format!(
"{}: gitignore too large ({} bytes, max {})",
path.display(),
content.len(),
MAX_GITIGNORE_SIZE,
)));
}
let text = String::from_utf8_lossy(&content);
let mut filter = Self::new();
for line in text.lines() {
if let Some(rule) = IgnoreRule::parse(line) {
filter.rules.push(rule);
}
}
Ok(filter)
}
pub fn add_rule(&mut self, pattern: &str) {
if let Some(rule) = IgnoreRule::parse(pattern) {
self.rules.push(rule);
}
}
pub fn is_ignored(&self, path: &Path, is_dir: bool) -> bool {
let mut ignored = false;
for rule in &self.rules {
if rule.matches(path, is_dir) {
ignored = !rule.negated;
}
}
ignored
}
pub fn is_name_ignored(&self, name: &str, is_dir: bool) -> bool {
self.is_ignored(Path::new(name), is_dir)
}
pub fn merge(&mut self, other: &IgnoreFilter) {
self.rules.extend(other.rules.iter().cloned());
}
pub fn merged_with(&self, other: &IgnoreFilter) -> IgnoreFilter {
let mut merged = self.clone();
merged.merge(other);
merged
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_patterns() {
let mut filter = IgnoreFilter::new();
filter.add_rule("*.log");
filter.add_rule("temp/");
assert!(filter.is_ignored(Path::new("app.log"), false));
assert!(filter.is_ignored(Path::new("debug.log"), false));
assert!(!filter.is_ignored(Path::new("app.txt"), false));
assert!(filter.is_ignored(Path::new("temp"), true));
assert!(!filter.is_ignored(Path::new("temp"), false)); }
#[test]
fn test_negation() {
let mut filter = IgnoreFilter::new();
filter.add_rule("*.log");
filter.add_rule("!important.log");
assert!(filter.is_ignored(Path::new("debug.log"), false));
assert!(!filter.is_ignored(Path::new("important.log"), false));
}
#[test]
fn test_anchored_patterns() {
let mut filter = IgnoreFilter::new();
filter.add_rule("/root.txt");
filter.add_rule("anywhere.txt");
assert!(filter.is_ignored(Path::new("root.txt"), false));
assert!(!filter.is_ignored(Path::new("sub/root.txt"), false));
assert!(filter.is_ignored(Path::new("anywhere.txt"), false));
assert!(filter.is_ignored(Path::new("sub/anywhere.txt"), false));
}
#[test]
fn test_directory_patterns() {
let mut filter = IgnoreFilter::new();
filter.add_rule("build/");
assert!(filter.is_ignored(Path::new("build"), true));
assert!(!filter.is_ignored(Path::new("build"), false)); }
#[test]
fn test_globstar() {
let mut filter = IgnoreFilter::new();
filter.add_rule("**/*.log");
assert!(filter.is_ignored(Path::new("app.log"), false));
assert!(filter.is_ignored(Path::new("logs/app.log"), false));
assert!(filter.is_ignored(Path::new("var/logs/app.log"), false));
}
#[test]
fn test_defaults() {
let filter = IgnoreFilter::with_defaults();
assert!(filter.is_ignored(Path::new(".git"), true));
assert!(filter.is_ignored(Path::new("node_modules"), true));
assert!(filter.is_ignored(Path::new("target"), true));
assert!(filter.is_ignored(Path::new("__pycache__"), true));
}
#[test]
fn test_comments_and_empty() {
let mut filter = IgnoreFilter::new();
filter.add_rule("# comment");
filter.add_rule("");
filter.add_rule(" ");
filter.add_rule("valid.txt");
assert_eq!(filter.rules.len(), 1);
assert!(filter.is_ignored(Path::new("valid.txt"), false));
}
#[test]
fn test_path_patterns() {
let mut filter = IgnoreFilter::new();
filter.add_rule("logs/*.log");
assert!(filter.is_ignored(Path::new("logs/app.log"), false));
assert!(!filter.is_ignored(Path::new("other/app.log"), false));
assert!(!filter.is_ignored(Path::new("app.log"), false));
}
mod async_tests {
use super::*;
use crate::{WalkerDirEntry, WalkerError, WalkerFs};
use std::collections::HashMap;
use std::path::PathBuf;
struct MemEntry;
impl WalkerDirEntry for MemEntry {
fn name(&self) -> &str { "" }
fn is_dir(&self) -> bool { false }
fn is_file(&self) -> bool { true }
fn is_symlink(&self) -> bool { false }
}
struct SingleFileFs(HashMap<PathBuf, Vec<u8>>);
#[async_trait::async_trait]
impl WalkerFs for SingleFileFs {
type DirEntry = MemEntry;
async fn list_dir(&self, _: &Path) -> Result<Vec<MemEntry>, WalkerError> {
Ok(vec![])
}
async fn read_file(&self, path: &Path) -> Result<Vec<u8>, WalkerError> {
self.0.get(path)
.cloned()
.ok_or_else(|| WalkerError::NotFound(path.display().to_string()))
}
async fn is_dir(&self, _: &Path) -> bool { false }
async fn exists(&self, path: &Path) -> bool { self.0.contains_key(path) }
}
#[tokio::test]
async fn test_oversized_gitignore_rejected() {
let oversized = vec![b'#'; super::MAX_GITIGNORE_SIZE + 1];
let mut files = HashMap::new();
files.insert(PathBuf::from("/.gitignore"), oversized);
let fs = SingleFileFs(files);
let result = IgnoreFilter::from_gitignore(Path::new("/.gitignore"), &fs).await;
assert!(result.is_err());
let err = result.unwrap_err().to_string();
assert!(err.contains("too large"), "expected 'too large' in: {err}");
}
#[tokio::test]
async fn test_normal_gitignore_accepted() {
let content = b"*.log\n# comment\ntarget/\n".to_vec();
let mut files = HashMap::new();
files.insert(PathBuf::from("/.gitignore"), content);
let fs = SingleFileFs(files);
let filter = IgnoreFilter::from_gitignore(Path::new("/.gitignore"), &fs)
.await
.unwrap();
assert!(filter.is_ignored(Path::new("app.log"), false));
assert!(filter.is_ignored(Path::new("target"), true));
}
}
}