use std::path::Path;
use memchr::memmem;
use roaring::RoaringBitmap;
use crate::path_util::path_bytes;
use super::{ByteSplitExt, PathIndex};
pub struct PathFilter {
pub file_ids: RoaringBitmap,
}
pub fn build_filter(
path_index: &PathIndex,
file_type: Option<&str>,
exclude_type: Option<&str>,
path_glob: Option<&str>,
) -> Option<PathFilter> {
let mut result: Option<RoaringBitmap> = None;
if let Some(ext) = file_type {
let ext_bitmap = path_index
.files_with_extension(ext)
.cloned()
.unwrap_or_default();
result = Some(match result {
Some(r) => r & &ext_bitmap,
None => ext_bitmap,
});
}
if path_glob.is_some() {
let mut glob_bitmap = RoaringBitmap::new();
for (file_id, path) in path_index.visible_paths() {
if matches_path_filter(path, file_type, exclude_type, path_glob) {
glob_bitmap.insert(file_id);
}
}
result = Some(match result {
Some(r) => r & &glob_bitmap,
None => glob_bitmap,
});
}
if let Some(ext) = exclude_type {
if let Some(ext_bitmap) = path_index.files_with_extension(ext) {
result = Some(match result {
Some(mut r) => {
r -= ext_bitmap;
r
}
None => {
let mut all = RoaringBitmap::new();
for (file_id, _) in path_index.visible_paths() {
all.insert(file_id);
}
all -= ext_bitmap;
all
}
});
}
}
result.map(|file_ids| PathFilter { file_ids })
}
pub(crate) fn matches_path_filter(
path: &Path,
file_type: Option<&str>,
exclude_type: Option<&str>,
path_glob: Option<&str>,
) -> bool {
let path_bytes = path_bytes(path);
let path_bytes = path_bytes.as_ref();
if let Some(ext) = file_type {
if !path_has_extension(path_bytes, ext.as_bytes()) {
return false;
}
}
if let Some(ext) = exclude_type {
if path_has_extension(path_bytes, ext.as_bytes()) {
return false;
}
}
if let Some(glob) = path_glob {
if !path_matches_glob(path, glob) {
return false;
}
}
true
}
pub(crate) fn path_matches_glob(path: &Path, glob: &str) -> bool {
let path_bytes = path_bytes(path);
let path = path_bytes.as_ref();
let glob = glob.as_bytes();
if glob.starts_with(b"*.") && !glob.contains(&b'/') {
return path_has_extension(path, &glob[2..]);
}
if let Some(rest) = glob.strip_prefix(b"**/") {
if rest.starts_with(b"*.") && !rest.contains(&b'/') {
return path_has_extension(path, &rest[2..]);
}
if !rest.contains(&b'/') && !rest.contains(&b'*') {
return path_has_component(path, rest);
}
return memmem::find(path, rest).is_some();
}
if glob.contains(&b'*') || glob.contains(&b'?') {
if glob.contains(&b'/') {
return path_glob_matches(path, glob);
}
return path
.split(|&b| b == b'/')
.any(|component| glob_matches_bytes(component, glob));
}
if glob.ends_with(b"/") {
return path.starts_with(glob) || memmem::find(path, &[b"/", glob].concat()).is_some();
}
if glob.contains(&b'/') {
return memmem::find(path, glob).is_some();
}
path_has_component(path, glob)
}
fn path_glob_matches(path: &[u8], glob: &[u8]) -> bool {
if glob_matches_bytes(path, glob) {
return true;
}
path.iter()
.enumerate()
.filter_map(|(idx, byte)| (*byte == b'/').then_some(idx + 1))
.any(|start| glob_matches_bytes(&path[start..], glob))
}
fn glob_matches_bytes(text: &[u8], pattern: &[u8]) -> bool {
let mut text_idx = 0usize;
let mut pattern_idx = 0usize;
let mut star_idx = None::<usize>;
let mut star_text_idx = 0usize;
while text_idx < text.len() {
if pattern_idx < pattern.len()
&& (pattern[pattern_idx] == text[text_idx] || pattern[pattern_idx] == b'?')
{
text_idx += 1;
pattern_idx += 1;
} else if pattern_idx < pattern.len() && pattern[pattern_idx] == b'*' {
star_idx = Some(pattern_idx);
pattern_idx += 1;
star_text_idx = text_idx;
} else if let Some(star) = star_idx {
pattern_idx = star + 1;
star_text_idx += 1;
text_idx = star_text_idx;
} else {
return false;
}
}
while pattern_idx < pattern.len() && pattern[pattern_idx] == b'*' {
pattern_idx += 1;
}
pattern_idx == pattern.len()
}
fn path_has_extension(path: &[u8], ext: &[u8]) -> bool {
let Some(name) = path.rsplit(|&b| b == b'/').next() else {
return false;
};
let Some((_, actual_ext)) = ByteSplitExt::rsplit_once(name, |&b| b == b'.') else {
return false;
};
ascii_eq_ignore_case(actual_ext, ext)
}
fn path_has_component(path: &[u8], word: &[u8]) -> bool {
for component in path.split(|&b| b == b'/') {
if component == word {
return true;
}
if let Some((stem, _)) = ByteSplitExt::rsplit_once(component, |&b| b == b'.') {
if stem == word {
return true;
}
}
}
false
}
fn ascii_eq_ignore_case(left: &[u8], right: &[u8]) -> bool {
left.len() == right.len()
&& left
.iter()
.zip(right.iter())
.all(|(l, r)| l.eq_ignore_ascii_case(r))
}
#[cfg(test)]
mod tests {
use super::*;
fn make_index() -> PathIndex {
let paths = vec![
std::path::PathBuf::from("src/main.rs"),
std::path::PathBuf::from("src/lib.rs"),
std::path::PathBuf::from("src/util.py"),
std::path::PathBuf::from("tests/test_main.rs"),
std::path::PathBuf::from("docs/readme.md"),
std::path::PathBuf::from("scripts/build.js"),
];
PathIndex::build(&paths)
}
#[test]
fn filter_by_extension() {
let idx = make_index();
let filter = build_filter(&idx, Some("rs"), None, None).unwrap();
assert_eq!(filter.file_ids.len(), 3);
}
#[test]
fn filter_by_path_glob() {
let idx = make_index();
let filter = build_filter(&idx, None, None, Some("src/")).unwrap();
assert_eq!(filter.file_ids.len(), 3);
}
#[test]
fn filter_combined_type_and_path() {
let idx = make_index();
let filter = build_filter(&idx, Some("rs"), None, Some("src/")).unwrap();
assert_eq!(filter.file_ids.len(), 2);
}
#[test]
fn filter_exclude_type() {
let idx = make_index();
let filter = build_filter(&idx, None, Some("js"), None).unwrap();
assert_eq!(filter.file_ids.len(), 5);
}
#[test]
fn no_filter_returns_none() {
let idx = make_index();
let filter = build_filter(&idx, None, None, None);
assert!(filter.is_none());
}
#[test]
fn glob_star_extension() {
assert!(path_matches_glob(Path::new("src/main.rs"), "*.rs"));
assert!(!path_matches_glob(Path::new("src/main.py"), "*.rs"));
}
#[test]
fn glob_double_star_extension() {
assert!(path_matches_glob(
Path::new("deep/nested/file.rs"),
"**/*.rs"
));
assert!(!path_matches_glob(
Path::new("deep/nested/file.py"),
"**/*.rs"
));
}
#[test]
fn matches_path_filter_combines_type_and_glob() {
assert!(matches_path_filter(
Path::new("src/main.rs"),
Some("rs"),
None,
Some("src/")
));
assert!(!matches_path_filter(
Path::new("src/main.py"),
Some("rs"),
None,
Some("src/")
));
assert!(!matches_path_filter(
Path::new("tests/main.rs"),
Some("rs"),
None,
Some("src/")
));
}
#[test]
fn bare_word_glob_requires_component_boundary() {
assert!(path_matches_glob(Path::new("test/foo.rs"), "test"));
assert!(path_matches_glob(Path::new("src/test.rs"), "test"));
assert!(path_matches_glob(Path::new("src/test/util.rs"), "test"));
assert!(!path_matches_glob(Path::new("src/contest.rs"), "test"));
assert!(!path_matches_glob(Path::new("src/testing.rs"), "test"));
}
#[test]
fn path_with_slash_still_uses_substring() {
assert!(path_matches_glob(Path::new("src/test/foo.rs"), "src/test"));
assert!(!path_matches_glob(Path::new("lib/test/foo.rs"), "src/test"));
}
#[test]
fn wildcard_glob_matches_file_component() {
assert!(path_matches_glob(
Path::new("tests/search_tests.rs"),
"*tests.rs"
));
assert!(!path_matches_glob(
Path::new("tests/search.rs"),
"*tests.rs"
));
}
#[test]
fn wildcard_glob_with_slash_matches_component_suffix() {
assert!(path_matches_glob(Path::new("vendor/lib.rs"), "vendor/**"));
assert!(path_matches_glob(
Path::new("src/vendor/lib.rs"),
"vendor/**"
));
assert!(!path_matches_glob(
Path::new("src/not_vendor/lib.rs"),
"vendor/**"
));
}
#[test]
fn double_star_slash_bare_word_requires_component_boundary() {
assert!(
!path_matches_glob(Path::new("src/contest.rs"), "**/test"),
"**/test must not match 'contest.rs' (substring, not component)"
);
assert!(
!path_matches_glob(Path::new("src/testing.rs"), "**/test"),
"**/test must not match 'testing.rs'"
);
assert!(
path_matches_glob(Path::new("test/foo.rs"), "**/test"),
"**/test must match 'test/foo.rs'"
);
assert!(
path_matches_glob(Path::new("src/test.rs"), "**/test"),
"**/test must match 'src/test.rs' (stem matches component)"
);
assert!(
path_matches_glob(Path::new("src/test/util.rs"), "**/test"),
"**/test must match when test is a directory component"
);
}
#[test]
fn double_star_slash_with_slash_still_uses_substring() {
assert!(path_matches_glob(
Path::new("deep/src/test/util.rs"),
"**/src/test"
));
assert!(!path_matches_glob(
Path::new("deep/lib/test/util.rs"),
"**/src/test"
));
}
#[cfg(unix)]
#[test]
fn non_utf8_paths_participate_in_extension_and_glob_filters() {
use std::ffi::OsString;
use std::os::unix::ffi::OsStringExt;
let path = std::path::PathBuf::from(OsString::from_vec(b"src/odd\xff.rs".to_vec()));
assert!(matches_path_filter(&path, Some("rs"), None, Some("src/")));
assert!(path_matches_glob(&path, "*.rs"));
assert!(path_matches_glob(&path, "src/"));
}
#[test]
fn byte_split_ext_no_sep() {
let s: &[u8] = b"nodot";
assert_eq!(ByteSplitExt::rsplit_once(s, |&b| b == b'.'), None);
}
#[test]
fn byte_split_ext_last_sep() {
let s: &[u8] = b"foo.bar.baz";
let (head, tail) = ByteSplitExt::rsplit_once(s, |&b| b == b'.').unwrap();
assert_eq!(head, b"foo.bar");
assert_eq!(tail, b"baz");
}
}