#![cfg(feature = "walk")]
#![cfg_attr(docsrs, doc(cfg(feature = "walk")))]
mod behavior;
mod glob;
use std::fs::{FileType, Metadata};
use std::io;
use std::path::{Path, PathBuf};
use thiserror::Error;
use walkdir::{self, DirEntry, WalkDir};
use crate::filter::{
self, CancelWalk, HierarchicalIterator, Isomeric, SeparatingFilter, SeparatingFilterInput,
Separation, TreeResidue, WalkCancellation,
};
use crate::walk::glob::FilterAny;
use crate::{BuildError, Pattern};
pub use crate::walk::behavior::{
DepthBehavior, DepthMax, DepthMin, DepthMinMax, LinkBehavior, WalkBehavior,
};
pub use crate::walk::glob::GlobEntry;
type FileFiltrate<T> = Result<T, WalkError>;
type FileResidue<R> = TreeResidue<R>;
type FileFeed<T, R> = (FileFiltrate<T>, FileResidue<R>);
impl<T, R> Isomeric for (T, FileResidue<R>)
where
T: Entry,
R: Entry,
{
type Substituent<'a>
= &'a dyn Entry
where
Self: 'a;
fn substituent(separation: &Separation<Self>) -> Self::Substituent<'_> {
match separation {
Separation::Filtrate(filtrate) => filtrate.get(),
Separation::Residue(residue) => residue.get().get(),
}
}
}
trait SplitAtDepth {
fn split_at_depth(&self, depth: usize) -> (&Path, &Path);
}
impl SplitAtDepth for Path {
fn split_at_depth(&self, depth: usize) -> (&Path, &Path) {
let ancestor = self.ancestors().nth(depth).unwrap_or(Path::new(""));
let descendant = self.strip_prefix(ancestor).unwrap();
(ancestor, descendant)
}
}
trait JoinAndGetDepth {
fn join_and_get_depth(&self, path: impl AsRef<Path>) -> (PathBuf, usize);
}
impl JoinAndGetDepth for Path {
fn join_and_get_depth(&self, path: impl AsRef<Path>) -> (PathBuf, usize) {
let path = path.as_ref();
let joined = self.join(path);
let depth = joined.components().count();
let depth = if path.is_absolute() {
depth
.checked_add(1)
.expect("overflow determining join depth")
}
else {
depth.saturating_sub(self.components().count())
};
(joined, depth)
}
}
#[derive(Debug, Error)]
#[error("failed to match directory tree: {kind}")]
pub struct WalkError {
depth: usize,
kind: WalkErrorKind,
}
impl WalkError {
pub fn path(&self) -> Option<&Path> {
self.kind.path()
}
pub fn depth(&self) -> usize {
self.depth
}
}
impl From<walkdir::Error> for WalkError {
fn from(error: walkdir::Error) -> Self {
let depth = error.depth();
let path = error.path().map(From::from);
if error.io_error().is_some() {
WalkError {
depth,
kind: WalkErrorKind::Io {
path,
error: error.into_io_error().expect("incongruent error kind"),
},
}
}
else {
WalkError {
depth,
kind: WalkErrorKind::LinkCycle {
root: error
.loop_ancestor()
.expect("incongruent error kind")
.into(),
leaf: path.expect("incongruent error kind"),
},
}
}
}
}
impl From<WalkError> for io::Error {
fn from(error: WalkError) -> Self {
let kind = match &error.kind {
WalkErrorKind::Io { error, .. } => error.kind(),
_ => io::ErrorKind::Other,
};
io::Error::new(kind, error)
}
}
#[derive(Debug, Error)]
#[non_exhaustive]
enum WalkErrorKind {
#[error("failed to read file at `{path:?}`: {error}")]
Io {
path: Option<PathBuf>,
error: io::Error,
},
#[error("symbolic link cycle detected from `{root}` to `{leaf}`")]
LinkCycle { root: PathBuf, leaf: PathBuf },
}
impl WalkErrorKind {
pub fn path(&self) -> Option<&Path> {
match self {
WalkErrorKind::Io { path, .. } => path.as_ref().map(PathBuf::as_ref),
WalkErrorKind::LinkCycle { leaf, .. } => Some(leaf.as_ref()),
}
}
}
pub trait PathExt {
fn walk(&self) -> WalkTree {
self.walk_with_behavior(WalkBehavior::default())
}
fn walk_with_behavior(&self, behavior: impl Into<WalkBehavior>) -> WalkTree;
}
impl PathExt for Path {
fn walk_with_behavior(&self, behavior: impl Into<WalkBehavior>) -> WalkTree {
WalkTree::with_behavior(self, behavior)
}
}
pub trait Entry {
fn into_path(self) -> PathBuf
where
Self: Sized;
fn path(&self) -> &Path;
fn root_relative_paths(&self) -> (&Path, &Path);
fn metadata(&self) -> Result<Metadata, WalkError>;
fn file_type(&self) -> FileType;
fn depth(&self) -> usize;
}
#[derive(Clone, Debug)]
pub struct TreeEntry {
entry: DirEntry,
}
impl Entry for TreeEntry {
fn into_path(self) -> PathBuf {
self.entry.into_path()
}
fn path(&self) -> &Path {
self.entry.path()
}
fn root_relative_paths(&self) -> (&Path, &Path) {
self.path().split_at_depth(self.depth())
}
fn metadata(&self) -> Result<Metadata, WalkError> {
self.entry.metadata().map_err(From::from)
}
fn file_type(&self) -> FileType {
self.entry.file_type()
}
fn depth(&self) -> usize {
self.entry.depth()
}
}
#[derive(Debug)]
pub struct WalkTree {
is_dir: bool,
input: walkdir::IntoIter,
}
impl WalkTree {
fn with_behavior(root: impl Into<PathBuf>, behavior: impl Into<WalkBehavior>) -> Self {
WalkTree::with_pivot_and_behavior(root, 0, behavior)
}
fn with_pivot_and_behavior(
root: impl Into<PathBuf>,
pivot: usize,
behavior: impl Into<WalkBehavior>,
) -> Self {
let root = root.into();
let WalkBehavior { link, depth } = behavior.into();
let builder = WalkDir::new(root.as_path()).follow_links(match link {
LinkBehavior::ReadFile => false,
LinkBehavior::ReadTarget => true,
});
let builder = match depth {
DepthBehavior::Max(max) => builder.max_depth(max.max_at_pivot(pivot)),
DepthBehavior::Min(min) => builder.min_depth(min.min_at_pivot(pivot)),
DepthBehavior::MinMax(minmax) => {
let (min, max) = minmax.min_max_at_pivot(pivot);
builder.min_depth(min).max_depth(max)
},
DepthBehavior::Unbounded => builder,
};
WalkTree {
is_dir: false,
input: builder.into_iter(),
}
}
}
impl CancelWalk for WalkTree {
fn cancel_walk_tree(&mut self) {
if self.is_dir {
self.input.skip_current_dir();
}
}
}
impl Iterator for WalkTree {
type Item = Result<TreeEntry, WalkError>;
fn next(&mut self) -> Option<Self::Item> {
let (is_dir, next) = match self.input.next() {
Some(result) => match result {
Ok(entry) => (entry.file_type().is_dir(), Some(Ok(TreeEntry { entry }))),
Err(error) => (false, Some(Err(error.into()))),
},
_ => (false, None),
};
self.is_dir = is_dir;
next
}
}
impl SeparatingFilterInput for WalkTree {
type Feed = (Result<TreeEntry, WalkError>, TreeResidue<TreeEntry>);
}
pub trait FileIterator:
HierarchicalIterator<Feed = FileFeed<Self::Entry, Self::Residue>>
+ Iterator<Item = FileFiltrate<Self::Entry>>
{
type Entry: Entry;
type Residue: Entry + From<Self::Entry>;
fn filter_entry<F>(self, f: F) -> FilterEntry<Self, F>
where
Self: Sized,
F: FnMut(&dyn Entry) -> Option<EntryResidue>,
{
FilterEntry { input: self, f }
}
fn not<'t, T>(self, pattern: T) -> Result<Not<Self>, BuildError>
where
Self: Sized,
T: Pattern<'t>,
{
let tree = pattern.try_into().map_err(Into::into)?;
FilterAny::any(tree.into_alternatives()).map(|filter| Not {
input: self,
filter,
})
}
}
impl<T, R, I> FileIterator for I
where
T: Entry,
R: Entry + From<T>,
I: HierarchicalIterator<Feed = FileFeed<T, R>> + Iterator<Item = FileFiltrate<T>>,
{
type Entry = T;
type Residue = R;
}
#[derive(Clone, Debug)]
pub struct FilterEntry<I, F> {
input: I,
f: F,
}
impl<I, F> CancelWalk for FilterEntry<I, F>
where
I: CancelWalk,
{
fn cancel_walk_tree(&mut self) {
self.input.cancel_walk_tree()
}
}
impl<T, R, I, F> SeparatingFilter for FilterEntry<I, F>
where
T: 'static + Entry,
R: 'static + Entry + From<T>,
I: FileIterator<Entry = T, Residue = R>,
F: FnMut(&dyn Entry) -> Option<EntryResidue>,
{
type Feed = I::Feed;
fn feed(&mut self) -> Option<Separation<Self::Feed>> {
self.input
.feed()
.map(|separation| match separation.transpose_filtrate() {
Ok(separation) => separation
.filter_tree_by_substituent(
WalkCancellation::unchecked(&mut self.input),
|substituent| (self.f)(substituent).map(From::from),
)
.map_filtrate(Ok),
Err(error) => error.map(Err).into(),
})
}
}
impl<T, R, I, F> Iterator for FilterEntry<I, F>
where
T: 'static + Entry,
R: 'static + Entry + From<T>,
I: FileIterator<Entry = T, Residue = R>,
F: FnMut(&dyn Entry) -> Option<EntryResidue>,
{
type Item = I::Item;
fn next(&mut self) -> Option<Self::Item> {
filter::filtrate(self)
}
}
#[derive(Clone, Debug)]
pub struct Not<I> {
input: I,
filter: FilterAny,
}
impl<I> CancelWalk for Not<I>
where
I: CancelWalk,
{
fn cancel_walk_tree(&mut self) {
self.input.cancel_walk_tree()
}
}
impl<T, R, I> SeparatingFilter for Not<I>
where
T: 'static + Entry,
R: 'static + Entry + From<T>,
I: FileIterator<Entry = T, Residue = R>,
{
type Feed = I::Feed;
fn feed(&mut self) -> Option<Separation<Self::Feed>> {
self.input
.feed()
.map(|separation| match separation.transpose_filtrate() {
Ok(separation) => separation
.filter_tree_by_substituent(
WalkCancellation::unchecked(&mut self.input),
|substituent| self.filter.residue(substituent).map(From::from),
)
.map_filtrate(Ok),
Err(error) => error.map(Err).into(),
})
}
}
impl<T, R, I> Iterator for Not<I>
where
T: 'static + Entry,
R: 'static + Entry + From<T>,
I: FileIterator<Entry = T, Residue = R>,
{
type Item = I::Item;
fn next(&mut self) -> Option<Self::Item> {
filter::filtrate(self)
}
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum EntryResidue {
File,
Tree,
}
impl From<EntryResidue> for TreeResidue<()> {
fn from(residue: EntryResidue) -> Self {
match residue {
EntryResidue::File => TreeResidue::Node(()),
EntryResidue::Tree => TreeResidue::Tree(()),
}
}
}
#[cfg(test)]
pub mod harness {
use build_fs_tree::{Build, FileSystemTree};
use std::collections::HashSet;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use tempfile::{self, TempDir};
use crate::walk::{Entry, FileIterator};
macro_rules! assert_set_eq {
($left:expr, $right:expr $(,)?) => {{
match (&$left, &$right) {
(left, right) if !(*left == *right) => {
let lrdiff: Vec<_> = left.difference(right).collect();
let rldiff: Vec<_> = right.difference(left).collect();
panic!(
"assertion `left == right` failed\n\
left: {:#?}\n\
right: {:#?}\n\
left - right: {:#?}\n\
right - left: {:#?}",
left, right, lrdiff, rldiff,
)
},
_ => {},
}
}};
}
pub(crate) use assert_set_eq;
#[derive(Debug)]
pub struct TempTree {
_root: TempDir,
path: PathBuf,
}
impl TempTree {
pub fn join_all<'a, I>(&'a self, paths: I) -> impl 'a + Iterator<Item = PathBuf>
where
I: IntoIterator,
I::IntoIter: 'a,
I::Item: AsRef<Path>,
{
paths.into_iter().map(|path| self.join(path))
}
}
impl AsRef<Path> for TempTree {
fn as_ref(&self) -> &Path {
&self.path
}
}
impl Deref for TempTree {
type Target = Path;
fn deref(&self) -> &Self::Target {
&self.path
}
}
pub fn temptree<P, C>(path: impl AsRef<Path>, tree: FileSystemTree<P, C>) -> TempTree
where
P: AsRef<Path> + Ord,
C: AsRef<[u8]>,
{
let root = tempfile::tempdir().expect("failed to create temporary directory");
let path = root.path().join(path);
tree.build(&path)
.expect("failed to write tree in temporary directory");
TempTree { _root: root, path }
}
pub fn assert_walk_paths_eq<I>(walk: impl FileIterator, expected: I)
where
I: IntoIterator,
I::Item: Into<PathBuf>,
{
let paths: HashSet<_> = walk
.map(|entry| entry.expect("failed to read file"))
.map(Entry::into_path)
.collect();
assert_set_eq!(paths, expected.into_iter().map(Into::into).collect());
}
}
#[cfg(test)]
mod tests {
use build_fs_tree::{dir, file};
use rstest::{fixture, rstest};
use std::collections::HashSet;
use crate::Pattern;
use crate::filter::{HierarchicalIterator, Separation, TreeResidue};
use crate::walk::behavior::{DepthBehavior, DepthMax, DepthMin, DepthMinMax, LinkBehavior};
use crate::walk::harness::{self, TempTree, assert_set_eq};
use crate::walk::{Entry, FileIterator, PathExt};
const ALL: [&str; 11] = [
"",
"doc",
"doc/guide.md",
"src",
"src/glob.rs",
"src/lib.rs",
"tests",
"tests/harness",
"tests/harness/mod.rs",
"tests/walk.rs",
"README.md",
];
fn except<'t>(paths: impl IntoIterator<Item = &'t str>) -> impl Iterator<Item = &'t str> {
let paths: HashSet<_> = paths.into_iter().collect();
ALL.into_iter().filter(move |path| !paths.contains(path))
}
#[fixture]
fn temptree() -> TempTree {
harness::temptree::<&str, &str>(
"project",
dir! {
"doc" => dir! {
"guide.md" => file!(""),
},
"src" => dir! {
"glob.rs" => file!(""),
"lib.rs" => file!(""),
},
"tests" => dir! {
"harness" => dir! {
"mod.rs" => file!(""),
},
"walk.rs" => file!(""),
},
"README.md" => file!(""),
},
)
}
#[cfg(any(unix, windows))]
#[fixture]
fn temptree_with_cyclic_link() -> TempTree {
use std::io;
use std::path::Path;
#[cfg(unix)]
fn link(target: impl AsRef<Path>, link: impl AsRef<Path>) -> io::Result<()> {
std::os::unix::fs::symlink(target, link)
}
#[cfg(windows)]
fn link(target: impl AsRef<Path>, link: impl AsRef<Path>) -> io::Result<()> {
std::os::windows::fs::symlink_dir(target, link)
}
let temptree = temptree();
link(&temptree, temptree.join("tests/cycle"))
.expect("failed to write symbolic link in temporary tree");
temptree
}
#[rstest]
fn walk_path_includes_all_paths(temptree: TempTree) {
harness::assert_walk_paths_eq(temptree.walk(), temptree.join_all(ALL));
}
#[rstest]
#[case::subtree(
"tests/**",
[
"",
"doc",
"doc/guide.md",
"src",
"src/glob.rs",
"src/lib.rs",
"README.md",
],
)]
#[case::extension_from_root("*.md", except(["README.md"]))]
#[case::extension_from_any_tree("**/*.md", except(["doc/guide.md", "README.md"]))]
#[case::any(
crate::any(["**/*.rs", "tests/**"]),
["", "doc", "doc/guide.md", "src", "README.md"],
)]
#[case::empty("", except([""]))]
fn walk_path_with_not_excludes_only_matching_paths<'t>(
temptree: TempTree,
#[case] pattern: impl Pattern<'t>,
#[case] expected: impl IntoIterator<Item = &'t str>,
) {
harness::assert_walk_paths_eq(
temptree.walk().not(pattern).unwrap(),
temptree.join_all(expected),
);
}
#[rstest]
#[case::max(DepthMax(1), ["", "doc", "src", "tests", "README.md"])]
#[case::min(
DepthMin::from_min_or_unbounded(2),
[
"doc/guide.md",
"src/glob.rs",
"src/lib.rs",
"tests/harness",
"tests/harness/mod.rs",
"tests/walk.rs",
],
)]
#[case::minmax(
DepthMinMax::from_depths_or_max(2, 2),
["doc/guide.md", "src/glob.rs", "src/lib.rs", "tests/harness", "tests/walk.rs"],
)]
fn walk_path_with_depth_behavior_includes_only_paths_at_depth<'t>(
temptree: TempTree,
#[case] depth: impl Into<DepthBehavior>,
#[case] expected: impl IntoIterator<Item = &'t str>,
) {
harness::assert_walk_paths_eq(
temptree.walk_with_behavior(depth.into()),
temptree.join_all(expected),
);
}
#[rstest]
#[case::tree("**", ALL)]
#[case::bounded_terminating_component("**/*.md", ["doc/guide.md", "README.md"])]
#[case::invariant_intermediate_component("**/src/**/*.rs", ["src/glob.rs", "src/lib.rs"])]
#[case::invariant("src/lib.rs", ["src/lib.rs"])]
fn walk_glob_includes_only_matching_paths<'t>(
temptree: TempTree,
#[case] expression: &str,
#[case] expected: impl IntoIterator<Item = &'t str>,
) {
harness::assert_walk_paths_eq(
crate::harness::assert_new_glob_is_ok(expression).walk(temptree.as_ref()),
temptree.join_all(expected),
);
}
#[rstest]
fn walk_empty_partitioned_glob_at_non_empty_prefix_includes_only_prefix(temptree: TempTree) {
let (prefix, glob) =
crate::harness::assert_new_glob_is_ok("src/lib.rs").partition_or_empty();
harness::assert_walk_paths_eq(
glob.walk(temptree.join(prefix)),
[temptree.join("src/lib.rs")],
);
}
#[rstest]
fn walk_glob_with_exhaustive_not_cancels_walk(temptree: TempTree) {
#[derive(Debug, Eq, Hash, PartialEq)]
enum TestSeparation<T> {
Filtrate(T),
Residue(TreeResidue<T>),
}
use TestSeparation::{Filtrate, Residue};
use TreeResidue::{Node, Tree};
let glob = crate::harness::assert_new_glob_is_ok("**/*.{md,rs}");
let mut paths = HashSet::new();
glob.walk(temptree.as_ref())
.not("**/harness/**")
.unwrap()
.filter_map_tree(|_, separation| {
paths.insert(match separation.as_ref() {
Separation::Filtrate(filtrate) => Filtrate(
filtrate
.get()
.as_ref()
.expect("failed to read file")
.path()
.to_path_buf(),
),
Separation::Residue(residue) => Residue(
residue
.get()
.as_ref()
.map(|residue| residue.path().to_path_buf()),
),
});
separation
})
.for_each(drop);
assert_set_eq!(
paths,
[
Residue(Node(temptree.to_path_buf())),
Residue(Node(temptree.join("doc"))),
Filtrate(temptree.join("doc/guide.md")),
Residue(Node(temptree.join("src"))),
Filtrate(temptree.join("src/glob.rs")),
Filtrate(temptree.join("src/lib.rs")),
Residue(Node(temptree.join("tests"))),
Residue(Tree(temptree.join("tests/harness"))),
Filtrate(temptree.join("tests/walk.rs")),
Filtrate(temptree.join("README.md")),
]
.into_iter()
.collect(),
);
}
#[rstest]
#[case::non_zero_max(DepthMax(1), "**", ["", "doc", "src", "tests", "README.md"])]
#[case::zero_max(DepthMax(0), "**", [""])]
#[case::min(
DepthMin::from_min_or_unbounded(2),
"**",
[
"doc/guide.md",
"src/glob.rs",
"src/lib.rs",
"tests/harness",
"tests/harness/mod.rs",
"tests/walk.rs",
],
)]
#[case::prefixed_minmax(
DepthMinMax::from_depths_or_max(2, 2),
"tests/**",
["tests/harness", "tests/walk.rs"],
)]
fn walk_glob_with_depth_behavior_includes_only_paths_at_depth<'t>(
temptree: TempTree,
#[case] depth: impl Into<DepthBehavior>,
#[case] expression: &str,
#[case] expected: impl IntoIterator<Item = &'t str>,
) {
harness::assert_walk_paths_eq(
crate::harness::assert_new_glob_is_ok(expression)
.walk_with_behavior(temptree.as_ref(), depth.into()),
temptree.join_all(expected),
);
}
#[cfg(any(unix, windows))]
#[rstest]
fn walk_glob_with_read_link_file_behavior_includes_link_file(
#[from(temptree_with_cyclic_link)] temptree: TempTree,
) {
harness::assert_walk_paths_eq(
crate::harness::assert_new_glob_is_ok("**")
.walk_with_behavior(temptree.as_ref(), LinkBehavior::ReadFile),
temptree.join_all([
"",
"doc",
"doc/guide.md",
"src",
"src/glob.rs",
"src/lib.rs",
"tests",
"tests/cycle",
"tests/harness",
"tests/harness/mod.rs",
"tests/walk.rs",
"README.md",
]),
);
}
#[cfg(any(unix, windows))]
#[rstest]
fn walk_glob_with_read_link_target_behavior_excludes_cyclic_link_target(
#[from(temptree_with_cyclic_link)] temptree: TempTree,
) {
let expected = vec![
#[allow(clippy::redundant_clone)]
temptree.to_path_buf(),
temptree.join("README.md"),
temptree.join("doc"),
temptree.join("doc/guide.md"),
temptree.join("src"),
temptree.join("src/glob.rs"),
temptree.join("src/lib.rs"),
temptree.join("tests"),
temptree.join("tests/harness"),
temptree.join("tests/harness/mod.rs"),
temptree.join("tests/walk.rs"),
];
let glob = crate::harness::assert_new_glob_is_ok("**");
let mut paths: Vec<_> = glob
.walk_with_behavior(temptree.as_ref(), LinkBehavior::ReadTarget)
.flatten()
.take(expected.len() + 1)
.map(Entry::into_path)
.collect();
paths.sort_unstable();
assert_eq!(paths, expected);
}
}