use std::cell::RefCell;
use std::path::{Path, PathBuf};
use ahash::{AHashMap, AHashSet};
use globset::{Glob, GlobSetBuilder};
use ignore::WalkBuilder;
use crate::config::Config;
use crate::scanner::{ScanError, ScanSource, submodule_roots_for_source};
pub(crate) struct Filters {
include: globset::GlobSet,
exclude: globset::GlobSet,
pub(crate) max_file_bytes: u64,
submodule_roots: Vec<String>,
submodule_prefixes: Vec<String>,
pub(crate) eager_l2: bool,
}
impl Filters {
pub(crate) fn build(config: &Config, submodule_roots: Vec<String>) -> Result<Self, ScanError> {
let include = compile_globs(&config.scan.include)?;
let exclude = compile_globs(&config.scan.exclude)?;
let submodule_roots: Vec<String> = if config.scan.skip_submodules {
submodule_roots
.into_iter()
.map(|s| s.trim_end_matches('/').to_string())
.filter(|s| !s.is_empty())
.collect()
} else {
Vec::new()
};
let submodule_prefixes: Vec<String> =
submodule_roots.iter().map(|r| format!("{r}/")).collect();
Ok(Self {
include,
exclude,
max_file_bytes: config.scan.max_file_bytes,
submodule_roots,
submodule_prefixes,
eager_l2: config.scan.eager_l2,
})
}
pub(crate) fn allows(&self, rel: &str) -> bool {
if self.exclude.is_match(rel) {
return false;
}
for (root, prefix) in self
.submodule_roots
.iter()
.zip(self.submodule_prefixes.iter())
{
if rel == root || rel.starts_with(prefix.as_str()) {
return false;
}
}
self.include.is_match(rel)
}
}
fn compile_globs(patterns: &[String]) -> Result<globset::GlobSet, ScanError> {
let mut b = GlobSetBuilder::new();
for p in patterns {
let g = Glob::new(p).map_err(|e| ScanError::BadGlob(format!("{p:?}: {e}")))?;
b.add(g);
}
b.build().map_err(|e| ScanError::BadGlob(format!("{e}")))
}
pub(crate) fn ignore_walk_builder(dir: &Path, respect_gitignore: bool) -> WalkBuilder {
let mut b = WalkBuilder::new(dir);
b.standard_filters(respect_gitignore)
.follow_links(false)
.git_ignore(respect_gitignore)
.git_exclude(respect_gitignore)
.hidden(false);
b
}
pub(crate) struct IndexFilter {
filters: Filters,
root: PathBuf,
respect_gitignore: bool,
allowed_children: RefCell<AHashMap<PathBuf, AHashSet<PathBuf>>>,
}
impl IndexFilter {
pub(crate) fn new(root: &Path, config: &Config) -> Result<Self, ScanError> {
let submodule_roots = submodule_roots_for_source(root, &ScanSource::WorkingTree);
let filters = Filters::build(config, submodule_roots)?;
Ok(Self {
filters,
root: root.to_path_buf(),
respect_gitignore: config.scan.respect_gitignore,
allowed_children: RefCell::new(AHashMap::new()),
})
}
pub(crate) fn clear_cache(&self) {
self.allowed_children.borrow_mut().clear();
}
pub(crate) fn allows_glob(&self, rel: &str) -> bool {
self.filters.allows(rel)
}
pub(crate) fn filters(&self) -> &Filters {
&self.filters
}
fn rel_of(&self, abs: &Path) -> Option<String> {
let rel = abs.strip_prefix(&self.root).ok()?;
let rel = rel.to_string_lossy().replace('\\', "/");
if rel.is_empty() { None } else { Some(rel) }
}
pub(crate) fn is_indexable(&self, abs: &Path) -> bool {
let Some(rel) = self.rel_of(abs) else {
return false;
};
if !self.filters.allows(&rel) {
return false;
}
if !self.respect_gitignore {
return true;
}
self.gitignore_allows(abs)
}
fn gitignore_allows(&self, abs: &Path) -> bool {
let Ok(rel) = abs.strip_prefix(&self.root) else {
return false;
};
let mut cur = self.root.clone();
for comp in rel.components() {
let child = cur.join(comp.as_os_str());
{
let mut memo = self.allowed_children.borrow_mut();
let allowed = memo
.entry(cur.clone())
.or_insert_with(|| shallow_allowed_children(&cur, self.respect_gitignore));
if !allowed.contains(&child) {
return false;
}
}
cur = child;
}
true
}
}
fn shallow_allowed_children(dir: &Path, respect_gitignore: bool) -> AHashSet<PathBuf> {
let mut set = AHashSet::new();
let walker = ignore_walk_builder(dir, respect_gitignore)
.parents(false)
.max_depth(Some(1))
.build();
for dent in walker.flatten() {
let p = dent.path();
if p == dir {
continue;
}
set.insert(p.to_path_buf());
}
set
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
fn filter_for(body: impl FnOnce(&Path)) -> (IndexFilter, PathBuf, tempfile::TempDir) {
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path().canonicalize().expect("canonicalize");
fs::create_dir_all(root.join(".git")).expect("mkdir .git");
body(&root);
let config = crate::config::default_for_root(&root);
let filter = IndexFilter::new(&root, &config).expect("build filter");
(filter, root, tmp)
}
#[test]
fn should_reject_path_under_nested_gitignore_rule() {
let (filter, root, _tmp) = filter_for(|root| {
fs::create_dir_all(root.join("sub")).unwrap();
fs::write(root.join("sub/.gitignore"), b"ignored.rs\n").unwrap();
fs::write(root.join("sub/ignored.rs"), b"fn a() {}\n").unwrap();
fs::write(root.join("sub/kept.rs"), b"fn b() {}\n").unwrap();
});
assert!(
!filter.is_indexable(&root.join("sub/ignored.rs")),
"a file matched by its own dir's nested .gitignore must be rejected"
);
assert!(
filter.is_indexable(&root.join("sub/kept.rs")),
"a tracked sibling must be kept"
);
}
#[test]
fn should_reject_path_when_ancestor_directory_is_gitignored() {
let (filter, root, _tmp) = filter_for(|root| {
fs::write(root.join(".gitignore"), b"build/\n").unwrap();
fs::create_dir_all(root.join("build/nested")).unwrap();
fs::write(root.join("build/nested/out.rs"), b"fn c() {}\n").unwrap();
fs::write(root.join("main.rs"), b"fn main() {}\n").unwrap();
});
assert!(
!filter.is_indexable(&root.join("build/nested/out.rs")),
"a file under an ancestor-gitignored directory must be rejected"
);
assert!(
filter.is_indexable(&root.join("main.rs")),
"a tracked top-level file must be kept"
);
}
#[test]
fn should_reject_root_and_nested_basemind_via_default_exclude() {
let (filter, root, _tmp) = filter_for(|root| {
fs::create_dir_all(root.join(".basemind")).unwrap();
fs::write(root.join(".basemind/x.msgpack"), b"\x00").unwrap();
fs::create_dir_all(root.join("child/.basemind")).unwrap();
fs::write(root.join("child/.basemind/y.msgpack"), b"\x00").unwrap();
fs::write(root.join("child/real.rs"), b"fn d() {}\n").unwrap();
});
assert!(!filter.allows_glob(".basemind/x.msgpack"));
assert!(!filter.allows_glob("child/.basemind/y.msgpack"));
assert!(!filter.is_indexable(&root.join(".basemind/x.msgpack")));
assert!(!filter.is_indexable(&root.join("child/.basemind/y.msgpack")));
assert!(
filter.is_indexable(&root.join("child/real.rs")),
"a real source file beside a nested .basemind must still be kept"
);
}
#[test]
fn should_reject_out_of_root_and_empty_rel() {
let (filter, root, _tmp) = filter_for(|root| {
fs::write(root.join("a.rs"), b"fn e() {}\n").unwrap();
});
assert!(!filter.is_indexable(&root));
assert!(!filter.is_indexable(Path::new("/definitely/not/under/root.rs")));
}
}