use crate::{crossdev, get_size_or_panic, InodeFilter, WalkOptions};
use anyhow::Result;
use filesize::PathExt;
use petgraph::{graph::NodeIndex, stable_graph::StableGraph, Directed, Direction};
use std::{
fs::Metadata,
io,
path::{Path, PathBuf},
time::{Duration, Instant},
};
pub type TreeIndex = NodeIndex;
pub type Tree = StableGraph<EntryData, (), Directed>;
#[derive(Eq, PartialEq, Debug, Default, Clone)]
pub struct EntryData {
pub name: PathBuf,
pub size: u128,
pub metadata_io_error: bool,
}
const REFRESH_RATE: Duration = Duration::from_millis(100);
#[derive(Default, Debug)]
pub struct Traversal {
pub tree: Tree,
pub root_index: TreeIndex,
pub entries_traversed: u64,
pub io_errors: u64,
pub total_bytes: Option<u128>,
}
impl Traversal {
pub fn from_walk(
mut walk_options: WalkOptions,
input: Vec<PathBuf>,
mut update: impl FnMut(&mut Traversal) -> Result<bool>,
) -> Result<Option<Traversal>> {
fn set_size_or_panic(tree: &mut Tree, node_idx: TreeIndex, current_size_at_depth: u128) {
tree.node_weight_mut(node_idx)
.expect("node for parent index we just retrieved")
.size = current_size_at_depth;
}
fn parent_or_panic(tree: &mut Tree, parent_node_idx: TreeIndex) -> TreeIndex {
tree.neighbors_directed(parent_node_idx, Direction::Incoming)
.next()
.expect("every node in the iteration has a parent")
}
fn pop_or_panic(v: &mut Vec<u128>) -> u128 {
v.pop().expect("sizes per level to be in sync with graph")
}
let mut t = {
let mut tree = Tree::new();
let root_index = tree.add_node(EntryData::default());
Traversal {
tree,
root_index,
..Default::default()
}
};
let (mut previous_node_idx, mut parent_node_idx) = (t.root_index, t.root_index);
let mut sizes_per_depth_level = Vec::new();
let mut current_size_at_depth: u128 = 0;
let mut previous_depth = 0;
let mut inodes = InodeFilter::default();
let mut last_checked = Instant::now();
const INITIAL_CHECK_INTERVAL: usize = 500;
let mut check_instant_every = INITIAL_CHECK_INTERVAL;
if walk_options.threads == 0 {
walk_options.threads = num_cpus::get();
}
#[cfg(not(windows))]
fn size_on_disk(_parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
name.size_on_disk_fast(meta)
}
#[cfg(windows)]
fn size_on_disk(parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
parent.join(name).size_on_disk_fast(meta)
}
for path in input.into_iter() {
let mut last_seen_eid = 0;
let device_id = crossdev::init(path.as_ref())?;
for (eid, entry) in walk_options
.iter_from_path(path.as_ref())
.into_iter()
.enumerate()
{
t.entries_traversed += 1;
let mut data = EntryData::default();
match entry {
Ok(entry) => {
data.name = if entry.depth < 1 {
path.clone()
} else {
entry.file_name.into()
};
let file_size = match &entry.client_state {
Some(Ok(ref m))
if !m.is_dir()
&& (walk_options.count_hard_links || inodes.add(m))
&& (walk_options.cross_filesystems
|| crossdev::is_same_device(device_id, m)) =>
{
if walk_options.apparent_size {
m.len()
} else {
size_on_disk(&entry.parent_path, &data.name, m).unwrap_or_else(
|_| {
t.io_errors += 1;
data.metadata_io_error = true;
0
},
)
}
}
Some(Ok(_)) => 0,
Some(Err(_)) => {
t.io_errors += 1;
data.metadata_io_error = true;
0
}
None => 0, } as u128;
match (entry.depth, previous_depth) {
(n, p) if n > p => {
sizes_per_depth_level.push(current_size_at_depth);
current_size_at_depth = file_size;
parent_node_idx = previous_node_idx;
}
(n, p) if n < p => {
for _ in n..p {
set_size_or_panic(
&mut t.tree,
parent_node_idx,
current_size_at_depth,
);
current_size_at_depth +=
pop_or_panic(&mut sizes_per_depth_level);
parent_node_idx = parent_or_panic(&mut t.tree, parent_node_idx);
}
current_size_at_depth += file_size;
set_size_or_panic(
&mut t.tree,
parent_node_idx,
current_size_at_depth,
);
}
_ => {
current_size_at_depth += file_size;
}
};
data.size = file_size;
let entry_index = t.tree.add_node(data);
t.tree.add_edge(parent_node_idx, entry_index, ());
previous_node_idx = entry_index;
previous_depth = entry.depth;
}
Err(_) => {
if previous_depth == 0 {
data.name = path.clone();
let entry_index = t.tree.add_node(data);
t.tree.add_edge(parent_node_idx, entry_index, ());
}
t.io_errors += 1
}
}
if eid != 0
&& eid % check_instant_every == 0
&& last_checked.elapsed() >= REFRESH_RATE
{
let now = Instant::now();
let elapsed = (now - last_checked).as_millis() as f64;
check_instant_every = (INITIAL_CHECK_INTERVAL as f64
* ((eid - last_seen_eid) as f64 / INITIAL_CHECK_INTERVAL as f64)
* (REFRESH_RATE.as_millis() as f64 / elapsed))
.max(1.0) as usize;
last_seen_eid = eid;
last_checked = now;
if update(&mut t)? {
return Ok(None);
}
}
}
}
sizes_per_depth_level.push(current_size_at_depth);
current_size_at_depth = 0;
for _ in 0..previous_depth {
current_size_at_depth += pop_or_panic(&mut sizes_per_depth_level);
set_size_or_panic(&mut t.tree, parent_node_idx, current_size_at_depth);
parent_node_idx = parent_or_panic(&mut t.tree, parent_node_idx);
}
let root_size = t.recompute_root_size();
set_size_or_panic(&mut t.tree, t.root_index, root_size);
t.total_bytes = Some(root_size);
Ok(Some(t))
}
fn recompute_root_size(&self) -> u128 {
self.tree
.neighbors_directed(self.root_index, Direction::Outgoing)
.map(|idx| get_size_or_panic(&self.tree, idx))
.sum()
}
}