use crate::{Error, Result, ScanConfig, ScanEntry};
use globset::GlobSet;
use std::path::Path;
use walkdir::WalkDir;
pub struct Scanner {
config: ScanConfig,
excludes: GlobSet,
}
impl Scanner {
pub fn new(config: ScanConfig) -> Result<Self> {
let mut builder = globset::GlobSetBuilder::new();
for glob in &config.excludes {
builder.add(glob.clone());
}
let excludes = builder
.build()
.map_err(|e| Error::InvalidExclude(e.to_string()))?;
Ok(Self { config, excludes })
}
pub fn walk<P: AsRef<Path>>(&self, root: P) -> ScanWalkIter<'_> {
let walker = WalkDir::new(root.as_ref())
.follow_links(self.config.follow_symlinks)
.into_iter();
ScanWalkIter {
scanner: self,
inner: walker,
}
}
#[must_use]
pub fn config(&self) -> &ScanConfig {
&self.config
}
#[cfg(feature = "watch")]
pub(crate) fn excludes_for_watch(&self) -> &GlobSet {
&self.excludes
}
}
pub struct ScanWalkIter<'a> {
scanner: &'a Scanner,
inner: walkdir::IntoIter,
}
impl Iterator for ScanWalkIter<'_> {
type Item = Result<ScanEntry>;
fn next(&mut self) -> Option<Self::Item> {
loop {
let dir_entry = match self.inner.next()? {
Ok(de) => de,
Err(e) => return Some(Err(Error::from(e))),
};
if dir_entry.file_type().is_dir() {
if self.is_excluded(dir_entry.path()) {
self.inner.skip_current_dir();
}
continue;
}
if dir_entry.file_type().is_symlink() {
continue;
}
if self.is_excluded(dir_entry.path()) {
continue;
}
let Ok(metadata) = dir_entry.metadata() else {
continue;
};
let size_bytes = metadata.len();
if let Some(cap) = self.scanner.config.max_file_size_bytes {
if size_bytes > cap {
continue;
}
}
let modified = metadata.modified().ok();
let extension = dir_entry
.path()
.extension()
.and_then(|os| os.to_str())
.map(str::to_ascii_lowercase)
.unwrap_or_default();
return Some(Ok(ScanEntry {
path: dir_entry.into_path(),
size_bytes,
modified,
extension,
}));
}
}
}
impl ScanWalkIter<'_> {
fn is_excluded(&self, path: &Path) -> bool {
self.scanner.excludes.is_match(path)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
fn make_tree() -> tempfile::TempDir {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
fs::write(root.join("a.txt"), "hello").unwrap();
fs::write(root.join("b.log"), "noise").unwrap();
fs::create_dir_all(root.join(".git")).unwrap();
fs::write(root.join(".git/HEAD"), "ref: refs/heads/main").unwrap();
fs::create_dir_all(root.join("nested/deep")).unwrap();
fs::write(root.join("nested/c.md"), "# title").unwrap();
fs::write(root.join("nested/deep/d.csv"), "a,b\n1,2").unwrap();
dir
}
#[test]
fn walks_all_files_by_default() {
let dir = make_tree();
let scanner = Scanner::new(ScanConfig::default()).unwrap();
let entries: Vec<_> = scanner.walk(dir.path()).filter_map(Result::ok).collect();
assert_eq!(entries.len(), 5, "got {entries:?}");
}
#[test]
fn excludes_match_files() {
let dir = make_tree();
let scanner = Scanner::new(ScanConfig::default().add_exclude("**/*.log").unwrap()).unwrap();
let extensions: Vec<_> = scanner
.walk(dir.path())
.filter_map(Result::ok)
.map(|e| e.extension)
.collect();
assert!(!extensions.contains(&"log".to_string()));
assert_eq!(extensions.len(), 4);
}
#[test]
fn excludes_match_directories_and_skip_subtree() {
let dir = make_tree();
let scanner =
Scanner::new(ScanConfig::default().add_exclude("**/.git/**").unwrap()).unwrap();
let paths: Vec<_> = scanner
.walk(dir.path())
.filter_map(Result::ok)
.map(|e| e.path)
.collect();
assert!(
!paths.iter().any(|p| p.to_string_lossy().contains("/.git/")),
"got {paths:?}"
);
}
#[test]
fn size_cap_filters_oversized_files() {
let dir = tempfile::tempdir().unwrap();
fs::write(dir.path().join("small.txt"), "tiny").unwrap();
fs::write(dir.path().join("big.txt"), vec![0u8; 200]).unwrap();
let scanner = Scanner::new(ScanConfig::default().max_file_size_bytes(100)).unwrap();
let entries: Vec<_> = scanner.walk(dir.path()).filter_map(Result::ok).collect();
assert_eq!(entries.len(), 1);
assert!(entries[0].path.ends_with("small.txt"));
}
#[test]
fn extension_is_lowercased_and_dotless() {
let dir = tempfile::tempdir().unwrap();
fs::write(dir.path().join("upper.PDF"), b"").unwrap();
let scanner = Scanner::new(ScanConfig::default()).unwrap();
let entries: Vec<_> = scanner.walk(dir.path()).filter_map(Result::ok).collect();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].extension, "pdf");
}
#[test]
fn extensionless_files_get_empty_string() {
let dir = tempfile::tempdir().unwrap();
fs::write(dir.path().join("README"), b"readme").unwrap();
let scanner = Scanner::new(ScanConfig::default()).unwrap();
let entries: Vec<_> = scanner.walk(dir.path()).filter_map(Result::ok).collect();
assert_eq!(entries[0].extension, "");
}
#[test]
fn missing_root_returns_error_via_iter() {
let scanner = Scanner::new(ScanConfig::default()).unwrap();
let mut iter = scanner.walk("/this/path/cannot/exist/9f3a2b1c");
let first = iter.next();
assert!(matches!(first, Some(Err(_))));
}
}