use dactyl::NoHash;
use std::{
collections::HashSet,
ffi::{
OsStr,
OsString,
},
fs::DirEntry,
path::{
Path,
PathBuf,
},
};
const AHASHER: ahash::RandomState = ahash::RandomState::with_seeds(
0x8596_cc44_bef0_1aa0,
0x98d4_0948_da60_19ae,
0x49f1_3013_c503_a6aa,
0xc4d7_82ff_3c9f_7bef,
);
#[derive(Debug, Clone)]
pub struct Dowser {
files: Vec<PathBuf>,
dirs: Vec<PathBuf>,
seen: HashSet<u64, NoHash>,
symlinks: bool,
}
impl Default for Dowser {
#[inline]
fn default() -> Self {
Self {
files: Vec::with_capacity(8),
dirs: Vec::with_capacity(8),
seen: HashSet::with_capacity_and_hasher(4096, NoHash::default()),
symlinks: true,
}
}
}
macro_rules! from {
($($ty:ty),+ $(,)?) => ($(
impl From<$ty> for Dowser {
#[inline]
fn from(src: $ty) -> Self { Self::default().with_path(src) }
}
impl From<&[$ty]> for Dowser {
#[inline]
fn from(src: &[$ty]) -> Self {
src.iter().fold(Dowser::default(), Dowser::with_path)
}
}
impl From<Vec<$ty>> for Dowser {
#[inline]
fn from(src: Vec<$ty>) -> Self { Self::from(src.as_slice()) }
}
)+);
}
from!{
&OsStr,
&OsString, OsString,
&Path,
&PathBuf, PathBuf,
&str,
&String, String,
}
impl Iterator for Dowser {
type Item = PathBuf;
fn next(&mut self) -> Option<Self::Item> {
loop {
if let Some(p) = self.files.pop() { return Some(p); }
let p = self.dirs.pop()?;
let Ok(rd) = std::fs::read_dir(p) else { continue; };
for e in rd {
if
let Ok(e) = e &&
let Some(e) = Entry::from_dir_entry(&e, self.symlinks)
{
self.record_entry(e);
}
}
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.files.len(), None)
}
}
impl Dowser {
pub fn push_path<P>(&mut self, path: P)
where P: AsRef<Path> {
if let Some(e) = Entry::from_path(path.as_ref(), self.symlinks) {
self.record_entry(e);
}
}
pub fn push_paths_from_file<P: AsRef<Path>>(&mut self, src: P)
-> Result<(), std::io::Error> {
let raw = std::fs::read_to_string(src)?;
for line in raw.lines() {
let line = line.trim();
if
! line.is_empty() &&
let Some(e) = Entry::from_path(line.as_ref(), self.symlinks)
{
self.record_entry(e);
}
}
Ok(())
}
#[doc(hidden)]
pub fn push_paths_from_stdin(&mut self) {
use std::io::IsTerminal;
if self.seen.insert(AHASHER.hash_one(b"-")) {
let stdin = std::io::stdin();
if ! stdin.is_terminal() {
for line in stdin.lines() {
let Ok(line) = line else { break; };
let line = line.trim();
if
! line.is_empty() &&
let Some(e) = Entry::from_path(line.as_ref(), self.symlinks)
{
self.record_entry(e);
}
}
}
}
}
#[must_use]
pub fn with_path<P>(mut self, path: P) -> Self
where P: AsRef<Path> {
self.push_path(path);
self
}
}
impl Dowser {
#[must_use]
#[inline]
pub const fn without_symlinks(mut self) -> Self {
self.symlinks = false;
self
}
#[must_use]
#[inline]
pub fn without_path<P>(mut self, path: P) -> Self
where P: AsRef<Path> {
if let Some(e) = Entry::from_path(path.as_ref(), self.symlinks) {
self.seen.insert(e.hash());
}
self
}
}
impl Dowser {
#[inline]
fn record_entry(&mut self, e: Entry) {
if self.seen.insert(e.hash()) {
match e {
Entry::Dir(p) => { self.dirs.push(p); },
Entry::File(p) => { self.files.push(p); },
}
}
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
enum Entry {
Dir(PathBuf),
File(PathBuf),
}
impl Entry {
fn from_path(path: &Path, follow: bool) -> Option<Self> {
if ! follow {
let meta = std::fs::symlink_metadata(path).ok()?;
if meta.file_type().is_symlink() { return None; }
}
if
let Ok(path) = std::fs::canonicalize(path) &&
let Ok(meta) = std::fs::symlink_metadata(&path) {
if meta.is_dir() { Some(Self::Dir(path)) }
else { Some(Self::File(path)) }
}
else { None }
}
#[expect(clippy::filetype_is_file, reason = "We're testing all three possibilities.")]
#[inline]
fn from_dir_entry(e: &DirEntry, follow: bool) -> Option<Self> {
let ft = e.file_type().ok()?;
if ft.is_dir() { Some(Self::Dir(e.path())) }
else if ft.is_file() { Some(Self::File(e.path())) }
else if
follow &&
let Ok(path) = std::fs::canonicalize(e.path()) &&
let Ok(meta) = std::fs::symlink_metadata(&path) {
if meta.is_dir() { Some(Self::Dir(path)) }
else { Some(Self::File(path)) }
}
else { None }
}
}
impl Entry {
#[cfg(unix)]
#[must_use]
#[inline]
pub(super) fn hash(&self) -> u64 {
use std::os::unix::ffi::OsStrExt;
AHASHER.hash_one(self.path().as_os_str().as_bytes())
}
#[cfg(not(unix))]
#[must_use]
#[inline]
pub(super) fn hash(&self) -> u64 { AHASHER.hash_one(self.path()) }
#[inline]
fn path(&self) -> &Path {
match self { Self::Dir(p) | Self::File(p) => p.as_path() }
}
}
#[cfg(test)]
mod tests {
use super::*;
use brunch as _;
use std::collections::BTreeSet;
#[test]
fn t_new() {
let mut abs_dir = std::fs::canonicalize("tests/assets/").unwrap();
abs_dir.push("_.txt");
let abs_p1 = abs_dir.with_file_name("file.txt");
let abs_p2 = abs_dir.with_file_name("is-executable.sh");
let abs_p3 = std::fs::canonicalize("tests/extensions.txt").unwrap();
let abs_perr = abs_dir.with_file_name("foo.bar");
let mut w1: Vec<PathBuf> = Dowser::default()
.with_path(PathBuf::from("tests/"))
.collect();
assert!(! w1.is_empty());
assert_eq!(w1.len(), 10);
assert!(w1.contains(&abs_p1));
assert!(w1.contains(&abs_p2));
assert!(w1.contains(&abs_p3));
assert!(! w1.contains(&abs_perr));
let mut w2: Vec<PathBuf> = Dowser::from("tests/").collect();
w1.sort();
w2.sort();
assert_eq!(w1, w2);
}
#[test]
fn t_resolve_path() {
let test_dir = std::fs::canonicalize("./tests/links")
.expect("Missing dowser link directory.");
let links = std::fs::read_dir(&test_dir)
.expect("Missing dowser link directory.")
.filter_map(Result::ok)
.filter_map(|e| e.file_type().ok())
.filter(std::fs::FileType::is_symlink)
.count();
assert_eq!(links, 1, "Wrong symlink count!");
let raw = vec![
test_dir.join("01"),
test_dir.join("02"),
test_dir.join("03"),
test_dir.join("04"),
test_dir.join("05"), test_dir.join("06"), test_dir.join("07"), test_dir.join("06/08"),
test_dir.join("06/09"),
test_dir.join("06/10"), test_dir.join("06/11"), ];
let mut canon = {
let mut tmp: Vec<PathBuf> = raw.iter()
.filter_map(|x| std::fs::canonicalize(x).ok())
.collect();
tmp.sort();
tmp.dedup();
tmp
};
assert_eq!(raw.len(), 11);
assert_eq!(canon.len(), 8, "{canon:?}");
assert!(! canon.contains(&raw[6]));
assert!(! canon.contains(&raw[9]));
assert!(! canon.contains(&raw[10]));
let trusting = {
let mut tmp: Vec<PathBuf> = raw.iter()
.filter_map(|p| Entry::from_path(p, true))
.map(|e| match e { Entry::Dir(p) | Entry::File(p) => p })
.collect();
tmp.sort();
tmp.dedup();
tmp
};
assert_eq!(trusting, canon);
canon.retain(|p| p.is_file());
let mut itered: Vec<PathBuf> = Dowser::from(test_dir.as_path()).collect();
itered.sort();
assert_eq!(canon, itered);
let six_dir = std::fs::canonicalize(test_dir.join("06")).expect("Missing test dir 06");
let yay: Vec<_> = Dowser::default().with_path(&six_dir).collect();
let nay: Vec<_> = Dowser::default().without_symlinks().with_path(&six_dir).collect();
assert!(! nay.is_empty(), "BUG: Symlinks logic broke totals.");
assert!(nay.len() < yay.len(), "BUG: Symlinks were followed!");
assert!(
nay.iter().all(|p| p.parent().is_some_and(|p| p == six_dir)),
"Bug: Symlinks were followed!",
);
}
#[test]
fn t_without() {
let root = std::fs::canonicalize("./tests/links").expect("Missing test directory.");
let found: BTreeSet<PathBuf> = Dowser::default()
.without_path("./tests/links/04")
.without_path("./tests/links/06")
.with_path("./tests/links")
.collect();
assert_eq!(
found.len(),
4,
"Unexpected number of files found!"
);
for stub in ["01", "02", "03", "06/08"] {
assert!(
found.contains(&root.join(stub)),
"Missing {stub}.",
);
}
}
#[test]
fn t_push_paths_from_file() {
use std::fs::File;
use std::io::Write;
let tmp = std::env::temp_dir();
if ! tmp.is_dir() { return; }
let asset_dir = std::fs::canonicalize("tests/assets")
.expect("Missing dowser assets dir");
let link01 = std::fs::canonicalize("tests/links/01")
.expect("Missing dowser links/01");
let text_file = tmp.join("dowser.test.txt");
let text = format!(
"{}\n{}\n",
asset_dir.as_os_str().to_str().expect("Asset dir cannot be represented as a string."),
link01.as_os_str().to_str().expect("Link01 cannot be represented as a string."),
);
let res = File::create(&text_file)
.and_then(|mut file|
file.write_all(text.as_bytes()).and_then(|()| file.flush())
);
if res.is_ok() && text_file.is_file() {
let mut crawl = Dowser::default();
crawl.push_paths_from_file(&text_file)
.expect("Loading text file failed.");
let found: BTreeSet<PathBuf> = crawl.collect();
let _res = std::fs::remove_file(text_file);
assert!(found.len() == 4);
assert!(found.contains(&link01));
assert!(found.contains(&asset_dir.join("file.txt")));
assert!(found.contains(&asset_dir.join("functioning.JPEG")));
assert!(found.contains(&asset_dir.join("is-executable.sh")));
}
}
}