use std::fs;
use crate::node::Node;
use crate::utils::is_filtered_out_due_to_invert_regex;
use crate::utils::is_filtered_out_due_to_regex;
use rayon::iter::ParallelBridge;
use rayon::prelude::ParallelIterator;
use regex::Regex;
use std::path::PathBuf;
use std::sync::atomic;
use std::sync::atomic::AtomicBool;
use std::collections::HashSet;
use crate::node::build_node;
use std::fs::DirEntry;
use crate::platform::get_metadata;
pub struct WalkData<'a> {
pub ignore_directories: HashSet<PathBuf>,
pub filter_regex: &'a [Regex],
pub invert_filter_regex: &'a [Regex],
pub allowed_filesystems: HashSet<u64>,
pub use_apparent_size: bool,
pub by_filecount: bool,
pub ignore_hidden: bool,
}
pub fn walk_it(dirs: HashSet<PathBuf>, walk_data: WalkData) -> (Vec<Node>, bool) {
let permissions_flag = AtomicBool::new(false);
let top_level_nodes: Vec<_> = dirs
.into_iter()
.filter_map(|d| {
let n = walk(d, &permissions_flag, &walk_data, 0);
match n {
Some(n) => {
let mut inodes: HashSet<(u64, u64)> = HashSet::new();
clean_inodes(n, &mut inodes, walk_data.use_apparent_size)
}
None => None,
}
})
.collect();
(top_level_nodes, permissions_flag.into_inner())
}
fn clean_inodes(
x: Node,
inodes: &mut HashSet<(u64, u64)>,
use_apparent_size: bool,
) -> Option<Node> {
if !use_apparent_size {
if let Some(id) = x.inode_device {
if inodes.contains(&id) {
return None;
}
inodes.insert(id);
}
}
let new_children: Vec<_> = x
.children
.into_iter()
.filter_map(|c| clean_inodes(c, inodes, use_apparent_size))
.collect();
return Some(Node {
name: x.name,
size: x.size + new_children.iter().map(|c| c.size).sum::<u64>(),
children: new_children,
inode_device: x.inode_device,
depth: x.depth,
});
}
fn ignore_file(entry: &DirEntry, walk_data: &WalkData) -> bool {
let is_dot_file = entry.file_name().to_str().unwrap_or("").starts_with('.');
let is_ignored_path = walk_data.ignore_directories.contains(&entry.path());
if !walk_data.allowed_filesystems.is_empty() {
let size_inode_device = get_metadata(&entry.path(), false);
if let Some((_size, Some((_id, dev)))) = size_inode_device {
if !walk_data.allowed_filesystems.contains(&dev) {
return true;
}
}
}
if !walk_data.filter_regex.is_empty()
&& entry.path().is_file()
&& is_filtered_out_due_to_regex(walk_data.filter_regex, &entry.path())
{
return true;
}
if !walk_data.invert_filter_regex.is_empty()
&& entry.path().is_file()
&& is_filtered_out_due_to_invert_regex(walk_data.invert_filter_regex, &entry.path())
{
return true;
}
(is_dot_file && walk_data.ignore_hidden) || is_ignored_path
}
fn walk(
dir: PathBuf,
permissions_flag: &AtomicBool,
walk_data: &WalkData,
depth: usize,
) -> Option<Node> {
let mut children = vec![];
if let Ok(entries) = fs::read_dir(dir.clone()) {
children = entries
.into_iter()
.par_bridge()
.filter_map(|entry| {
if let Ok(ref entry) = entry {
if !ignore_file(entry, walk_data) {
if let Ok(data) = entry.file_type() {
if data.is_dir() && !data.is_symlink() {
return walk(entry.path(), permissions_flag, walk_data, depth + 1);
}
return build_node(
entry.path(),
vec![],
walk_data.filter_regex,
walk_data.invert_filter_regex,
walk_data.use_apparent_size,
data.is_symlink(),
data.is_file(),
walk_data.by_filecount,
depth,
);
}
}
} else {
permissions_flag.store(true, atomic::Ordering::Relaxed);
}
None
})
.collect();
} else {
permissions_flag.store(true, atomic::Ordering::Relaxed);
}
build_node(
dir,
children,
walk_data.filter_regex,
walk_data.invert_filter_regex,
walk_data.use_apparent_size,
false,
false,
walk_data.by_filecount,
depth,
)
}
mod tests {
#[allow(unused_imports)]
use super::*;
#[cfg(test)]
fn create_node() -> Node {
Node {
name: PathBuf::new(),
size: 10,
children: vec![],
inode_device: Some((5, 6)),
depth: 0,
}
}
#[test]
fn test_should_ignore_file() {
let mut inodes = HashSet::new();
let n = create_node();
assert!(clean_inodes(n.clone(), &mut inodes, false) == Some(n.clone()));
assert!(clean_inodes(n.clone(), &mut inodes, false) == None);
}
#[test]
fn test_should_not_ignore_files_if_using_apparent_size() {
let mut inodes = HashSet::new();
let n = create_node();
assert!(clean_inodes(n.clone(), &mut inodes, true) == Some(n.clone()));
assert!(clean_inodes(n.clone(), &mut inodes, true) == Some(n.clone()));
}
}