#![cfg_attr(docsrs, feature(doc_cfg))]
#![deny(unused_imports)]
use std::{
collections::{HashSet, VecDeque},
fs, io,
path::{Path, PathBuf},
};
#[cfg(feature = "regex")]
use regex::Regex;
#[derive(Debug, Default, Clone)]
pub struct WalkOptions {
sort: bool,
dirs_only: bool,
files_only: bool,
follow_symlink: bool,
max_depth: Option<u64>,
extensions: HashSet<String>,
ends_with: Vec<String>,
names: HashSet<String>,
#[cfg(feature = "regex")]
regex: Vec<Regex>,
}
impl WalkOptions {
#[inline(always)]
pub fn new() -> Self {
Self::default()
}
#[inline(always)]
pub fn dirs(&mut self) -> &mut Self {
self.dirs_only = true;
self
}
#[inline(always)]
pub fn files(&mut self) -> &mut Self {
self.files_only = true;
self
}
pub fn follow_symlink(&mut self) -> &mut Self {
self.follow_symlink = true;
self
}
#[inline(always)]
pub fn max_depth(&mut self, depth: u64) -> &mut Self {
self.max_depth = Some(depth);
self
}
#[inline(always)]
pub fn extension<S: AsRef<str>>(&mut self, ext: S) -> &mut Self {
self.extensions.insert(ext.as_ref().to_string());
self
}
#[inline(always)]
pub fn ends_with<S: AsRef<str>>(&mut self, pat: S) -> &mut Self {
self.ends_with.push(pat.as_ref().to_string());
self
}
#[inline(always)]
pub fn name<S: AsRef<str>>(&mut self, name: S) -> &mut Self {
self.names.insert(name.as_ref().to_string());
self
}
#[inline(always)]
#[cfg(feature = "regex")]
#[cfg_attr(docsrs, doc(cfg(feature = "regex")))]
pub fn name_regex<S: AsRef<str>>(&mut self, regex: S) -> Result<&mut Self, regex::Error> {
self.regex.push(Regex::new(regex.as_ref())?);
Ok(self)
}
#[inline(always)]
fn regex_is_empty(&self) -> bool {
#[cfg(feature = "regex")]
return self.regex.is_empty();
#[cfg(not(feature = "regex"))]
true
}
#[inline(always)]
fn path_match<P: AsRef<Path>>(&self, p: P) -> bool {
let p = p.as_ref();
if self.extensions.is_empty()
&& self.ends_with.is_empty()
&& self.names.is_empty()
&& self.regex_is_empty()
{
return true;
}
if !self.names.is_empty()
&& p.file_name()
.and_then(|file_name| file_name.to_str())
.map(|file_name| self.names.contains(file_name))
.unwrap_or_default()
{
return true;
}
if !self.extensions.is_empty()
&& p.extension()
.and_then(|ext| ext.to_str())
.map(|ext| self.extensions.contains(ext))
.unwrap_or_default()
{
return true;
}
for trail in self.ends_with.iter() {
if p.to_string_lossy().ends_with(trail) {
return true;
}
}
#[cfg(feature = "regex")]
if let Some(file_name) = p.file_name().and_then(|n| n.to_str()) {
for re in self.regex.iter() {
if re.is_match(file_name) {
return true;
}
}
}
false
}
#[inline(always)]
pub fn sort(&mut self, value: bool) -> &mut Self {
self.sort = value;
self
}
#[inline(always)]
pub fn walk<P: AsRef<Path>>(&self, p: P) -> Walker {
Walker::from_path(p).with_options(self.clone())
}
}
pub struct Chunks {
it: Walker,
capacity: usize,
}
impl Iterator for Chunks {
type Item = Vec<Result<PathBuf, io::Error>>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let out: Self::Item = self.it.by_ref().take(self.capacity).collect();
if out.is_empty() {
return None;
}
Some(out)
}
}
#[derive(Debug)]
struct PathIterator {
depth: u64,
path: Option<PathBuf>,
items: VecDeque<Result<PathBuf, io::Error>>,
init: bool,
sort: bool,
}
impl PathIterator {
fn new<P: AsRef<Path>>(depth: u64, path: P, sort: bool) -> Self {
Self {
depth,
path: Some(path.as_ref().to_path_buf()),
items: VecDeque::new(),
init: false,
sort,
}
}
}
impl Iterator for PathIterator {
type Item = Result<PathBuf, io::Error>;
fn next(&mut self) -> Option<Self::Item> {
if !self.init {
self.init = true;
let path = self.path.as_ref().unwrap();
if path.is_file() {
match self.path.take() {
Some(p) => return Some(Ok(p)),
None => return None,
}
} else {
match fs::read_dir(path) {
Ok(rd) => {
let mut tmp: Vec<Result<PathBuf, io::Error>> =
rd.map(|r| r.map(|de| de.path())).collect();
if self.sort {
tmp.sort_by(|res1, res2| {
match (res1, res2) {
(Ok(path1), Ok(path2)) => path1.cmp(path2), (Err(_), Ok(_)) => std::cmp::Ordering::Greater, (Ok(_), Err(_)) => std::cmp::Ordering::Less, (Err(e1), Err(e2)) => e1.to_string().cmp(&e2.to_string()), }
});
}
self.items.extend(tmp);
}
Err(e) => self.items.push_back(Err(e)),
};
}
}
self.items.pop_front()
}
}
#[derive(Debug, Default)]
pub struct Walker {
init: bool,
root: PathBuf,
options: WalkOptions,
queue: VecDeque<PathIterator>,
current: Option<PathIterator>,
marked: HashSet<[u8; 32]>,
}
impl Walker {
#[inline(always)]
pub fn from_path<P: AsRef<Path>>(p: P) -> Self {
Self {
root: p.as_ref().to_path_buf(),
..Default::default()
}
}
#[inline(always)]
pub fn with_options(mut self, o: WalkOptions) -> Self {
self.options = o;
self
}
#[inline(always)]
pub fn chunks(self, size: usize) -> Chunks {
Chunks {
it: self,
capacity: size,
}
}
#[inline(always)]
fn initialize(&mut self) {
if let Ok(can) = self.root.canonicalize() {
let h = blake3::hash(can.to_string_lossy().as_bytes());
self.current = Some(PathIterator::new(0, &self.root, self.options.sort));
self.marked.insert(h.into());
}
self.init = true
}
#[inline(always)]
fn _next(&mut self) -> Option<Result<PathBuf, io::Error>> {
if !self.init {
self.initialize();
}
let Some(pi) = self.current.as_mut() else {
if self.queue.is_empty() {
return None;
} else {
self.current = self.queue.pop_back();
return self._next();
}
};
let depth = pi.depth;
let ni = pi.next();
match ni {
Some(Ok(p)) => {
if p.is_file() {
Some(Ok(p))
} else {
let next_depth = pi.depth + 1;
if let Some(max_depth) = self.options.max_depth {
if next_depth > max_depth {
return Some(Ok(p));
}
}
if let Ok(can) = p.canonicalize() {
let mut must_walk = false;
if p.is_symlink() && self.options.follow_symlink {
let h = blake3::hash(can.to_string_lossy().as_bytes());
if !self.marked.contains(h.as_bytes()) {
must_walk |= true;
self.marked.insert(h.into());
}
}
if must_walk || !p.is_symlink() {
let pi = self.current.take().unwrap();
self.queue.push_back(pi);
self.current = Some(PathIterator::new(depth + 1, &p, self.options.sort))
}
}
Some(Ok(p))
}
}
Some(Err(e)) => Some(Err(e)),
None => {
self.current = self.queue.pop_back();
self._next()
}
}
}
}
impl Iterator for Walker {
type Item = Result<PathBuf, io::Error>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
while let Some(item) = self._next() {
if item.is_err() {
return Some(item);
}
match item {
Ok(p) => {
if p.is_dir()
&& (!self.options.files_only || self.options.dirs_only)
&& self.options.path_match(&p)
{
return Some(Ok(p));
}
if p.is_file()
&& (!self.options.dirs_only || self.options.files_only)
&& self.options.path_match(&p)
{
return Some(Ok(p));
}
}
Err(e) => return Some(Err(e)),
}
}
None
}
}
#[cfg(test)]
mod tests {
use std::ffi::OsStr;
use super::*;
#[test]
fn test_walker() {
let w = Walker::from_path("./");
for e in w.flatten() {
println!("{e:?}")
}
}
#[test]
fn test_walker_on_file() {
let w = WalkOptions::new().walk("./src/lib.rs");
let v = w.flatten().collect::<Vec<PathBuf>>();
assert_eq!(v.len(), 1)
}
#[test]
fn test_walker_only_files() {
let mut o = WalkOptions::new();
o.files();
let w = o.walk("./");
for p in w.flatten() {
assert!(p.is_file())
}
}
#[test]
fn test_files_by_extension() {
let mut o = WalkOptions::new();
o.files().extension("o");
let w = o.walk("./");
let mut c = 0;
for p in w.flatten() {
assert_eq!(p.extension(), Some(OsStr::new("o")));
c += 1;
}
assert!(c > 0);
}
#[test]
fn test_files_ends_with() {
let mut o = WalkOptions::new();
o.ends_with(".o");
let w = o.walk("./");
let mut c = 0;
for p in w.flatten() {
assert_eq!(p.extension(), Some(OsStr::new("o")));
c += 1;
}
assert!(c > 0);
}
#[test]
fn test_dirs_ends_with() {
let mut o = WalkOptions::new();
o.ends_with("src").ends_with(".git");
let v = o.walk("./").flatten().collect::<Vec<PathBuf>>();
assert!(v.len() >= 2);
for p in v.iter() {
assert!(p.is_dir());
}
}
#[test]
fn test_files_by_chunks_and_extension() {
let mut o = WalkOptions::new();
o.files().extension("o");
let w = o.walk("./");
let mut c = 0;
for chunk in w.chunks(1) {
assert_eq!(chunk.len(), 1);
for p in chunk.iter().flatten() {
assert_eq!(p.extension(), Some(OsStr::new("o")));
c += 1;
}
}
assert!(c > 0);
}
#[test]
fn test_walker_only_dirs() {
let mut o = WalkOptions::new();
o.dirs();
let w = o.walk("./");
for p in w.flatten() {
assert!(p.is_dir());
}
}
#[test]
fn test_walker_dirs_and_files() {
let mut o = WalkOptions::new();
o.dirs().files();
let w = o.walk("./");
for p in w.flatten() {
assert!(p.is_dir() || p.is_file());
}
}
#[test]
fn test_max_depth() {
let d0 = WalkOptions::new().max_depth(0).walk("./").count();
let d1 = WalkOptions::new().max_depth(1).walk("./").count();
println!("d0={d0} d1={d1}");
assert!(d1 > d0);
}
#[test]
fn test_sort() {
let w = WalkOptions::new().max_depth(0).sort(true).walk("./");
let ns = WalkOptions::new().max_depth(0).sort(false).walk("./");
let sorted = w.flatten().collect::<Vec<PathBuf>>();
let mut unsorted = ns.flatten().collect::<Vec<PathBuf>>();
assert!(sorted.len() > 1);
assert_ne!(sorted, unsorted);
unsorted.sort();
assert_eq!(sorted, unsorted);
}
#[test]
fn test_name() {
let w = WalkOptions::new().name("lib.rs").name("src").walk("./");
let v = w.flatten().collect::<Vec<PathBuf>>();
assert!(v.len() > 1);
for p in v.iter() {
if p.file_name().unwrap() == "lib.rs" {
assert!(p.is_file())
}
if p.file_name().unwrap() == "src" {
assert!(p.is_dir())
}
}
}
#[test]
#[cfg(feature = "regex")]
fn test_name_regex() {
let mut w = WalkOptions::new();
w.name_regex(r#"^(.*\.rs|src|target)$"#)
.unwrap()
.name_regex(r#".*\.md"#)
.unwrap();
assert!(w.clone().dirs().walk("./").count() > 0);
assert!(w.clone().files().walk("./").count() > 0);
}
#[test]
fn test_walker_follow_symlink() {
use std::os::unix::fs::symlink;
use tempfile::{tempdir, Builder};
let dir = tempdir().unwrap();
let test_dir_path = dir.path().join("test_dir");
fs::create_dir(&test_dir_path).unwrap();
let file_path = test_dir_path.join("test_file.txt");
fs::File::create(&file_path).unwrap();
let symlink_path = Builder::new().prefix("symlink_test").tempdir().unwrap();
symlink(&dir, symlink_path.path().join("symlink")).unwrap();
symlink(&symlink_path, symlink_path.path().join("loop")).unwrap();
let paths = WalkOptions::new()
.follow_symlink()
.files()
.walk(&symlink_path)
.flatten()
.collect::<Vec<PathBuf>>();
assert_eq!(paths.len(), 1);
assert!(paths[0].ends_with("test_file.txt"));
let paths = WalkOptions::new()
.files()
.walk(&symlink_path)
.flatten()
.collect::<Vec<PathBuf>>();
assert!(paths.is_empty());
let paths = WalkOptions::new()
.dirs()
.walk(&symlink_path)
.flatten()
.collect::<Vec<PathBuf>>();
assert!(paths.iter().any(|p| p.ends_with("loop")));
assert!(paths.iter().any(|p| p.ends_with("symlink")));
assert!(!paths.iter().any(|p| p == &test_dir_path));
let paths = WalkOptions::new()
.dirs()
.follow_symlink()
.walk(&symlink_path)
.flatten()
.collect::<Vec<PathBuf>>();
println!("{paths:#?}");
println!("{test_dir_path:?}");
assert!(paths.iter().any(|p| p.ends_with("loop")));
assert!(paths.iter().any(|p| p.ends_with("symlink")));
assert!(paths
.iter()
.any(|p| p.canonicalize().unwrap() == test_dir_path));
}
}