use anyhow::Result;
use ignore::gitignore::Gitignore;
use std::path::{Path, PathBuf};
use crate::diagnostics::{SkipReason, WatchDiagnostic};
use crate::ingest::detect_language;
const INTERNAL_IGNORE_DIRS: &[&str] = &[
".git",
".magellan",
"target",
"node_modules",
".venv",
"venv",
"__pycache__",
];
const INTERNAL_IGNORE_EXTS: &[&str] = &[
".db",
".db-journal",
".db-wal",
".db-shm",
".sqlite",
".sqlite3",
];
pub struct FileFilter {
root: PathBuf,
gitignore: Option<Gitignore>,
include_patterns: Vec<globset::GlobMatcher>,
exclude_patterns: Vec<globset::GlobMatcher>,
}
impl FileFilter {
pub fn new(
root: &Path,
include_patterns: &[String],
exclude_patterns: &[String],
) -> Result<Self> {
let root = std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
let gitignore = Self::load_gitignore(&root)?;
let include_matchers = if include_patterns.is_empty() {
Vec::new()
} else {
Self::compile_globs(&root, include_patterns)?
};
let exclude_matchers = Self::compile_globs(&root, exclude_patterns)?;
Ok(Self {
root,
gitignore,
include_patterns: include_matchers,
exclude_patterns: exclude_matchers,
})
}
fn load_gitignore(root: &Path) -> Result<Option<Gitignore>> {
let mut builder = ignore::gitignore::GitignoreBuilder::new(root);
let gitignore_path = root.join(".gitignore");
if gitignore_path.exists() {
if let Some(err) = builder.add(&gitignore_path) {
eprintln!("Warning: Failed to load .gitignore: {}", err);
}
}
let ignore_path = root.join(".ignore");
if ignore_path.exists() {
if let Some(err) = builder.add(&ignore_path) {
eprintln!("Warning: Failed to load .ignore: {}", err);
}
}
Ok(Some(builder.build()?))
}
fn compile_globs(_root: &Path, patterns: &[String]) -> Result<Vec<globset::GlobMatcher>> {
let mut matchers = Vec::new();
for pattern in patterns {
let glob = match globset::Glob::new(pattern) {
Ok(g) => g,
Err(e) => {
return Err(anyhow::anyhow!("Invalid glob pattern '{}': {}", pattern, e));
}
};
matchers.push(glob.compile_matcher());
}
Ok(matchers)
}
pub fn should_skip(&self, path: &Path) -> Option<SkipReason> {
if !path.is_file() {
return Some(SkipReason::NotAFile);
}
if self.is_internal_ignore(path) {
return Some(SkipReason::IgnoredInternal);
}
if let Some(ref gitignore) = self.gitignore {
let check_path = if let Ok(rel) = path.strip_prefix(&self.root) {
rel
} else {
if let Ok(canonical_root) = std::fs::canonicalize(&self.root) {
if let Ok(rel) = path.strip_prefix(&canonical_root) {
rel
} else {
path
}
} else {
path
}
};
let is_ignored = gitignore.matched(check_path, path.is_dir());
if is_ignored.is_ignore() {
return Some(SkipReason::IgnoredByGitignore);
}
let mut current = check_path.parent();
while let Some(ancestor) = current {
let ancestor_ignored = gitignore.matched(ancestor, true);
if ancestor_ignored.is_ignore() {
return Some(SkipReason::IgnoredByGitignore);
}
current = ancestor.parent();
if ancestor.as_os_str().is_empty() {
break;
}
}
}
if detect_language(path).is_none() {
return Some(SkipReason::UnsupportedLanguage);
}
if !self.include_patterns.is_empty() {
let rel_path = self.relative_path(path);
let matches_include = self.include_patterns.iter().any(|m| m.is_match(&rel_path));
if !matches_include {
return Some(SkipReason::ExcludedByGlob);
}
}
if !self.exclude_patterns.is_empty() {
let rel_path = self.relative_path(path);
if self.exclude_patterns.iter().any(|m| m.is_match(&rel_path)) {
return Some(SkipReason::ExcludedByGlob);
}
}
None
}
fn is_internal_ignore(&self, path: &Path) -> bool {
if let Some(file_name) = path.file_name() {
let file_name_str = file_name.to_string_lossy();
for ext in INTERNAL_IGNORE_EXTS {
if file_name_str.ends_with(ext) {
return true;
}
}
}
if let Some(ext) = path.extension() {
let ext_str = ext.to_string_lossy();
if INTERNAL_IGNORE_EXTS.contains(&ext_str.as_ref()) {
return true;
}
}
if let Ok(rel_path) = path.strip_prefix(&self.root) {
for component in rel_path.components() {
if let std::path::Component::Normal(dir) = component {
let dir_str = dir.to_string_lossy();
if INTERNAL_IGNORE_DIRS.contains(&dir_str.as_ref()) {
return true;
}
}
}
}
false
}
fn relative_path(&self, path: &Path) -> String {
path.strip_prefix(&self.root)
.map(|p| p.to_string_lossy().replace('\\', "/"))
.unwrap_or_else(|_| path.to_string_lossy().into_owned())
}
pub fn is_database_file(&self, path: &Path) -> bool {
let path_str = path.to_string_lossy();
let path_lower = path_str.to_lowercase();
path_lower.ends_with(".db")
|| path_lower.ends_with(".db-journal")
|| path_lower.ends_with(".db-wal")
|| path_lower.ends_with(".db-shm")
|| path_lower.ends_with(".sqlite")
|| path_lower.ends_with(".sqlite3")
}
}
pub fn skip_diagnostic(root: &Path, path: &Path, reason: SkipReason) -> WatchDiagnostic {
let rel_path = path
.strip_prefix(root)
.map(|p| p.to_string_lossy().into_owned())
.unwrap_or_else(|_| path.to_string_lossy().into_owned());
WatchDiagnostic::skipped(rel_path, reason)
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_internal_ignore_dirs() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
let filter = FileFilter::new(root, &[], &[]).unwrap();
fs::create_dir_all(root.join(".git")).unwrap();
fs::create_dir_all(root.join("target")).unwrap();
fs::create_dir_all(root.join("node_modules")).unwrap();
fs::write(root.join(".git/config"), "test").unwrap();
fs::write(root.join("target/lib.rs"), "fn test() {}").unwrap();
fs::write(root.join("node_modules/index.js"), "test").unwrap();
assert_eq!(
filter.should_skip(&root.join(".git/config")),
Some(SkipReason::IgnoredInternal)
);
assert_eq!(
filter.should_skip(&root.join("target/lib.rs")),
Some(SkipReason::IgnoredInternal)
);
assert_eq!(
filter.should_skip(&root.join("node_modules/index.js")),
Some(SkipReason::IgnoredInternal)
);
}
#[test]
fn test_internal_ignore_extensions() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
let filter = FileFilter::new(root, &[], &[]).unwrap();
fs::write(root.join("test.db"), "data").unwrap();
fs::write(root.join("test.sqlite"), "data").unwrap();
fs::write(root.join("test.db-journal"), "data").unwrap();
assert_eq!(
filter.should_skip(&root.join("test.db")),
Some(SkipReason::IgnoredInternal)
);
assert_eq!(
filter.should_skip(&root.join("test.sqlite")),
Some(SkipReason::IgnoredInternal)
);
}
#[test]
fn test_unsupported_language() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
let filter = FileFilter::new(root, &[], &[]).unwrap();
fs::write(root.join("test.txt"), "text").unwrap();
fs::write(root.join("Makefile"), "all:").unwrap();
assert_eq!(
filter.should_skip(&root.join("test.txt")),
Some(SkipReason::UnsupportedLanguage)
);
assert_eq!(
filter.should_skip(&root.join("Makefile")),
Some(SkipReason::UnsupportedLanguage)
);
}
#[test]
fn test_supported_language_not_skipped() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
let filter = FileFilter::new(root, &[], &[]).unwrap();
fs::write(root.join("test.rs"), "fn test() {}").unwrap();
fs::write(root.join("test.py"), "def test(): pass").unwrap();
assert_eq!(filter.should_skip(&root.join("test.rs")), None);
assert_eq!(filter.should_skip(&root.join("test.py")), None);
}
#[test]
fn test_gitignore_filtering() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
fs::write(root.join(".gitignore"), "ignored.rs\nbuild/\n").unwrap();
fs::write(root.join("ignored.rs"), "fn test() {}").unwrap();
fs::write(root.join("included.rs"), "fn test() {}").unwrap();
fs::create_dir_all(root.join("build")).unwrap();
fs::write(root.join("build/output.rs"), "fn test() {}").unwrap();
let filter = FileFilter::new(root, &[], &[]).unwrap();
assert_eq!(
filter.should_skip(&root.join("ignored.rs")),
Some(SkipReason::IgnoredByGitignore),
"ignored.rs should be ignored by .gitignore pattern"
);
assert_eq!(
filter.should_skip(&root.join("included.rs")),
None,
"included.rs should not be ignored"
);
assert_eq!(
filter.should_skip(&root.join("build/output.rs")),
Some(SkipReason::IgnoredByGitignore),
"build/output.rs should be ignored by build/ pattern"
);
}
#[test]
fn test_include_patterns() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::create_dir_all(root.join("tests")).unwrap();
fs::write(root.join("src/lib.rs"), "fn test() {}").unwrap();
fs::write(root.join("tests/test.rs"), "fn test() {}").unwrap();
fs::write(root.join("main.rs"), "fn main() {}").unwrap();
let filter = FileFilter::new(root, &["src/**".to_string()], &[]).unwrap();
assert_eq!(filter.should_skip(&root.join("src/lib.rs")), None);
assert_eq!(
filter.should_skip(&root.join("tests/test.rs")),
Some(SkipReason::ExcludedByGlob)
);
assert_eq!(
filter.should_skip(&root.join("main.rs")),
Some(SkipReason::ExcludedByGlob)
);
}
#[test]
fn test_exclude_patterns() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(root.join("src/lib.rs"), "fn test() {}").unwrap();
fs::write(root.join("src/test.rs"), "fn test() {}").unwrap();
let filter = FileFilter::new(root, &[], &["**/*test*.rs".to_string()]).unwrap();
assert_eq!(filter.should_skip(&root.join("src/lib.rs")), None);
assert_eq!(
filter.should_skip(&root.join("src/test.rs")),
Some(SkipReason::ExcludedByGlob)
);
}
#[test]
fn test_include_and_exclude_patterns() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::create_dir_all(root.join("tests")).unwrap();
fs::write(root.join("src/lib.rs"), "fn test() {}").unwrap();
fs::write(root.join("src/test.rs"), "fn test() {}").unwrap();
fs::write(root.join("tests/integration.rs"), "fn test() {}").unwrap();
let filter =
FileFilter::new(root, &["src/**".to_string()], &["**/*test*.rs".to_string()]).unwrap();
assert_eq!(filter.should_skip(&root.join("src/lib.rs")), None);
assert_eq!(
filter.should_skip(&root.join("src/test.rs")),
Some(SkipReason::ExcludedByGlob)
);
assert_eq!(
filter.should_skip(&root.join("tests/integration.rs")),
Some(SkipReason::ExcludedByGlob)
);
}
#[test]
fn test_is_database_file() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
let filter = FileFilter::new(root, &[], &[]).unwrap();
assert!(filter.is_database_file(Path::new("test.db")));
assert!(filter.is_database_file(Path::new("test.sqlite")));
assert!(filter.is_database_file(Path::new("test.db-journal")));
assert!(!filter.is_database_file(Path::new("test.rs")));
assert!(!filter.is_database_file(Path::new("database.rs")));
}
#[test]
fn test_skip_diagnostic() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
let diagnostic = skip_diagnostic(
root,
&root.join("target/lib.rs"),
SkipReason::IgnoredInternal,
);
assert_eq!(diagnostic.path(), "target/lib.rs");
}
}
#[test]
fn test_tests_dir_not_skipped_by_default() {
let filter = FileFilter::new(std::path::Path::new("."), &[], &[]).unwrap();
let tests_file = std::path::Path::new("tests/algorithm_tests.rs");
let result = filter.should_skip(tests_file);
println!("tests/algorithm_tests.rs: {:?}", result);
assert!(
result.is_none(),
"Expected tests/algorithm_tests.rs NOT to be skipped, got: {:?}",
result
);
}