use std::collections::HashSet;
use std::path::Path;
use std::sync::OnceLock;
use gitignores::GitIgnore;
use ignore::gitignore::{Gitignore, GitignoreBuilder};
pub struct Match {
pub pattern: String,
}
const BLOCKLIST: &[&str] = &[
"out/",
"dist/",
"*_archive",
"*.log",
"*.gz",
"*.zip",
"*.pdf",
"*.tar",
"*.bak",
"*.orig",
"*.rar",
"*.db",
"*.sql",
"*.sqlite",
"*.sqlite3",
"tags",
"TAGS",
"*.idb",
"*.tmp",
"*.temp",
];
const HARDCODED: &[&str] = &[
"*.pyc",
"*.pyo",
"__pycache__/",
".ipynb_checkpoints/",
"node_modules/",
".git/",
".gitignore",
".gitattributes",
".DS_Store",
"Thumbs.db",
"desktop.ini",
"~$*",
"$~*",
"*.swp",
"*.swo",
"*~",
];
fn build_matcher() -> Gitignore {
let mut builder = GitignoreBuilder::new("/");
let blocklist_set: HashSet<&str> = BLOCKLIST.iter().copied().collect();
for name in gitignores::Global::list() {
let Some(template) = gitignores::Global::get(name) else {
continue;
};
let content = template.contents();
for line in content.lines() {
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
if trimmed.starts_with('!') {
continue;
}
if blocklist_set.contains(trimmed) {
continue;
}
let _ = builder.add_line(None, trimmed);
}
}
for &pattern in HARDCODED {
let _ = builder.add_line(None, pattern);
}
builder.build().expect("failed to build junk matcher")
}
fn global() -> &'static Gitignore {
static INSTANCE: OnceLock<Gitignore> = OnceLock::new();
INSTANCE.get_or_init(build_matcher)
}
pub fn check(path: &Path) -> Option<Match> {
let gitignore = global();
let is_dir = path.to_string_lossy().ends_with('/');
let m = gitignore.matched_path_or_any_parents(path, is_dir);
if m.is_ignore() {
let pattern = m
.inner()
.map_or_else(|| suggest_pattern(path), |glob| glob.original().to_string());
return Some(Match { pattern });
}
check_dotfile(path)
}
fn check_dotfile(path: &Path) -> Option<Match> {
let first = path.iter().next()?;
let name = first.to_str()?;
if !name.starts_with('.') || name == ".quiltignore" {
return None;
}
let pattern = if path.iter().count() > 1 {
format!("{name}/")
} else {
name.to_owned()
};
Some(Match { pattern })
}
pub fn pattern_matches(pattern: &str, path: &str) -> bool {
let mut builder = GitignoreBuilder::new("/");
if builder.add_line(None, pattern).is_err() {
return false;
}
let Ok(gi) = builder.build() else {
return false;
};
gi.matched_path_or_any_parents(Path::new(path), false)
.is_ignore()
}
fn suggest_pattern(path: &Path) -> String {
if let Some(ext) = path.extension() {
format!("*.{}", ext.to_string_lossy())
} else if let Some(name) = path.file_name() {
name.to_string_lossy().into_owned()
} else {
path.to_string_lossy().into_owned()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn detects_python_bytecode() {
let m = check(&PathBuf::from("data/clean.pyc")).unwrap();
assert_eq!(m.pattern, "*.pyc");
}
#[test]
fn detects_pycache_file() {
let m = check(&PathBuf::from("__pycache__/module.cpython-311.pyc")).unwrap();
assert!(m.pattern == "*.pyc" || m.pattern == "__pycache__/");
}
#[test]
fn detects_ds_store() {
let m = check(&PathBuf::from(".DS_Store")).unwrap();
assert_eq!(m.pattern, ".DS_Store");
}
#[test]
fn detects_node_modules_file() {
let m = check(&PathBuf::from("node_modules/package/index.js")).unwrap();
assert_eq!(m.pattern, "node_modules/");
}
#[test]
fn detects_editor_swap_files() {
let m = check(&PathBuf::from("data.csv.swp")).unwrap();
assert_eq!(m.pattern, "*.swp");
}
#[test]
fn detects_office_temp_files() {
let m = check(&PathBuf::from("~$report.xlsx")).unwrap();
assert_eq!(m.pattern, "~$*");
}
#[test]
fn detects_git_file() {
let m = check(&PathBuf::from(".git/config")).unwrap();
assert_eq!(m.pattern, ".git/");
}
#[test]
fn detects_gitignore_file() {
let m = check(&PathBuf::from(".gitignore")).unwrap();
assert_eq!(m.pattern, ".gitignore");
}
#[test]
fn does_not_flag_parquet() {
assert!(check(&PathBuf::from("data/results.parquet")).is_none());
}
#[test]
fn does_not_flag_csv() {
assert!(check(&PathBuf::from("data.csv")).is_none());
}
#[test]
fn does_not_flag_json() {
assert!(check(&PathBuf::from("config.json")).is_none());
}
#[test]
fn does_not_flag_xlsx() {
assert!(check(&PathBuf::from("report.xlsx")).is_none());
}
#[test]
fn does_not_flag_txt() {
assert!(check(&PathBuf::from("readme.txt")).is_none());
}
#[test]
fn blocklisted_patterns_do_not_match() {
assert!(check(&PathBuf::from("server.log")).is_none());
assert!(check(&PathBuf::from("archive.zip")).is_none());
}
#[test]
fn detects_dotfile_env() {
let m = check(&PathBuf::from(".env")).unwrap();
assert_eq!(m.pattern, ".env");
}
#[test]
fn detects_dotfile_directory() {
let m = check(&PathBuf::from(".myconfig/foo.toml")).unwrap();
assert_eq!(m.pattern, ".myconfig/");
}
#[test]
fn does_not_flag_quiltignore() {
assert!(check(&PathBuf::from(".quiltignore")).is_none());
}
#[test]
fn does_not_flag_nested_dotfile() {
assert!(check(&PathBuf::from("data/.hidden")).is_none());
}
#[test]
fn pattern_matches_works() {
assert!(pattern_matches("*.pyc", "data/clean.pyc"));
assert!(!pattern_matches("*.pyc", "data/clean.py"));
assert!(pattern_matches("data/*.csv", "data/file.csv"));
assert!(!pattern_matches("data/*.csv", "other/file.csv"));
}
}