use build_fs_tree::{dir, file, Build, MergeableFileSystemTree};
use command_extra::CommandExtra;
use derive_more::{AsRef, Deref};
use into_sorted::IntoSorted;
use parallel_disk_usage::{
data_tree::{DataTree, DataTreeReflection},
fs_tree_builder::FsTreeBuilder,
get_size::{self, GetSize},
hardlink::HardlinkIgnorant,
os_string_display::OsStringDisplay,
reporter::ErrorOnlyReporter,
size,
};
use pipe_trait::Pipe;
use pretty_assertions::assert_eq;
use rand::{distr::Alphanumeric, rng, Rng};
use rayon::prelude::*;
use std::{
cmp::Ordering,
env::temp_dir,
fs::{create_dir, metadata, remove_dir_all, symlink_metadata},
io::Error,
path::{Path, PathBuf},
process::{Command, Output},
};
#[cfg(unix)]
pub const DEFAULT_GET_SIZE: get_size::GetBlockSize = get_size::GetBlockSize;
#[cfg(not(unix))]
pub const DEFAULT_GET_SIZE: get_size::GetApparentSize = get_size::GetApparentSize;
#[derive(Debug, AsRef, Deref)]
#[as_ref(forward)]
#[deref(forward)]
pub struct Temp(PathBuf);
impl Temp {
pub fn new_dir() -> Result<Self, Error> {
let path = rng()
.sample_iter(&Alphanumeric)
.take(15)
.map(char::from)
.collect::<String>()
.pipe(|name| temp_dir().join(name));
if path.exists() {
return Self::new_dir();
}
create_dir(&path)?;
path.pipe(Temp).pipe(Ok)
}
}
impl Drop for Temp {
fn drop(&mut self) {
let path = &self.0;
if let Err(error) = remove_dir_all(path) {
eprintln!("warning: Failed to delete {path:?}: {error}");
}
}
}
#[derive(Debug, AsRef, Deref)]
#[as_ref(forward)]
#[deref(forward)]
pub struct SampleWorkspace(Temp);
impl Default for SampleWorkspace {
fn default() -> Self {
let temp = Temp::new_dir().expect("create working directory for sample workspace");
MergeableFileSystemTree::<&str, String>::from(dir! {
"flat" => dir! {
"0" => file!("")
"1" => file!("a".repeat(100_000))
"2" => file!("a".repeat(200_000))
"3" => file!("a".repeat(300_000))
}
"nested" => dir! {
"0" => dir! {
"1" => file!("a".repeat(500_000))
}
}
"empty-dir" => dir! {}
})
.build(&temp)
.expect("build the filesystem tree for the sample workspace");
SampleWorkspace(temp)
}
}
#[cfg(unix)]
impl SampleWorkspace {
pub fn simple_tree_with_some_hardlinks(sizes: [usize; 5]) -> Self {
use std::fs::hard_link;
let temp = Temp::new_dir().expect("create working directory for sample workspace");
MergeableFileSystemTree::<&str, String>::from(dir! {
"main" => dir! {
"sources" => dir! {
"no-hardlinks.txt" => file!("a".repeat(sizes[0])),
"one-internal-hardlink.txt" => file!("a".repeat(sizes[1])),
"two-internal-hardlinks.txt" => file!("a".repeat(sizes[2])),
"one-external-hardlink.txt" => file!("a".repeat(sizes[3])),
"one-internal-one-external-hardlinks.txt" => file!("a".repeat(sizes[4])),
}
"internal-hardlinks" => dir! {}
}
"external-hardlinks" => dir! {}
})
.build(&temp)
.expect("build the filesystem tree for the sample workspace");
macro_rules! link {
($original:literal -> $link:literal) => {{
let original = $original;
let link = $link;
if let Err(error) = hard_link(temp.join(original), temp.join(link)) {
panic!("Failed to link {original} to {link}: {error}");
}
}};
}
link!("main/sources/one-internal-hardlink.txt" -> "main/internal-hardlinks/link-0.txt");
link!("main/sources/two-internal-hardlinks.txt" -> "main/internal-hardlinks/link-1a.txt");
link!("main/sources/two-internal-hardlinks.txt" -> "main/internal-hardlinks/link-1b.txt");
link!("main/sources/one-external-hardlink.txt" -> "external-hardlinks/link-2.txt");
link!("main/sources/one-internal-one-external-hardlinks.txt" -> "main/internal-hardlinks/link-3a.txt");
link!("main/sources/one-internal-one-external-hardlinks.txt" -> "external-hardlinks/link-3b.txt");
SampleWorkspace(temp)
}
pub fn simple_tree_with_some_symlinks_and_hardlinks(sizes: [usize; 5]) -> Self {
use std::os::unix::fs::symlink;
let workspace = SampleWorkspace::simple_tree_with_some_hardlinks(sizes);
macro_rules! symlink {
($link_name:literal -> $target:literal) => {
let link_name = $link_name;
let target = $target;
if let Err(error) = symlink(target, workspace.join(link_name)) {
panic!("Failed create symbolic link {link_name} pointing to {target}: {error}");
}
};
}
symlink!("workspace-itself" -> ".");
symlink!("main/main-itself" -> ".");
symlink!("main/parent-of-main" -> "..");
symlink!("main-mirror" -> "./main");
symlink!("sources-mirror" -> "./main/sources");
workspace
}
pub fn multiple_hardlinks_to_a_single_file(bytes: usize, links: u64) -> Self {
use std::fs::{hard_link, write as write_file};
let temp = Temp::new_dir().expect("create working directory for sample workspace");
let file_path = temp.join("file.txt");
write_file(&file_path, "a".repeat(bytes)).expect("create file.txt");
for num in 0..links {
hard_link(&file_path, temp.join(format!("link.{num}")))
.unwrap_or_else(|error| panic!("Failed to create 'link.{num}': {error}"));
}
SampleWorkspace(temp)
}
pub fn complex_tree_with_shared_and_unique_files(
files_per_branch: usize,
bytes_per_file: usize,
) -> Self {
use std::fs::{create_dir_all, hard_link, write as write_file};
let whole = files_per_branch;
let half = files_per_branch / 2;
let quarter = files_per_branch / 4;
let half_quarter = files_per_branch / 8;
let temp = Temp::new_dir().expect("create working directory for sample workspace");
temp.join("no-hardlinks")
.pipe(create_dir_all)
.expect("create no-hardlinks");
temp.join("some-hardlinks")
.pipe(create_dir_all)
.expect("create some-hardlinks");
temp.join("only-hardlinks/exclusive")
.pipe(create_dir_all)
.expect("create only-hardlinks/exclusive");
temp.join("only-hardlinks/mixed")
.pipe(create_dir_all)
.expect("create only-hardlinks/mixed");
temp.join("only-hardlinks/external")
.pipe(create_dir_all)
.expect("create only-hardlinks/external");
(0..files_per_branch).par_bridge().for_each(|index| {
let file_name = format!("file-{index}.txt");
let file_path = temp.join("no-hardlinks").join(file_name);
if let Err(error) = write_file(&file_path, "a".repeat(bytes_per_file)) {
panic!("Failed to write {bytes_per_file} bytes into {file_path:?}: {error}");
}
});
(0..whole).par_bridge().for_each(|file_index| {
let file_name = format!("file-{file_index}.txt");
let file_path = temp.join("some-hardlinks").join(file_name);
if let Err(error) = write_file(&file_path, "a".repeat(bytes_per_file)) {
panic!("Failed to write {bytes_per_file} bytes into {file_path:?}: {error}");
}
let link_count =
((file_index < quarter) as usize) + ((file_index < half_quarter) as usize);
for link_index in 0..link_count {
let link_name = format!("link{link_index}-file{file_index}.txt");
let link_path = temp.join("some-hardlinks").join(link_name);
if let Err(error) = hard_link(&file_path, &link_path) {
panic!("Failed to link {file_path:?} to {link_path:?}: {error}");
}
}
});
(0..whole).par_bridge().for_each(|index| {
let file_name = format!("file-{index}.txt");
let file_path = temp.join("only-hardlinks/exclusive").join(file_name);
if let Err(error) = write_file(&file_path, "a".repeat(bytes_per_file)) {
panic!("Failed to write {bytes_per_file} bytes into {file_path:?}: {error}");
}
let link_name = format!("link-{index}.txt");
let link_path = temp.join("only-hardlinks/exclusive").join(link_name);
if let Err(error) = hard_link(&file_path, &link_path) {
panic!("Failed to link {file_path:?} to {link_path:?}: {error}");
}
});
(half..whole).par_bridge().for_each(|index| {
let file_name = format!("link0-{index}.txt");
let file_path = temp.join("only-hardlinks/mixed").join(file_name);
if let Err(error) = write_file(&file_path, "a".repeat(bytes_per_file)) {
panic!("Failed to write {bytes_per_file} bytes to {file_path:?}: {error}");
}
let link_name = format!("link1-{index}.txt");
let link_path = temp.join("only-hardlinks/mixed").join(link_name);
if let Err(error) = hard_link(&file_path, &link_path) {
panic!("Failed to link {file_path:?} to {link_path:?}: {error}");
}
});
(0..whole).par_bridge().for_each(|index| {
let link_name = format!("linkX-{index}.txt");
let link_path = temp.join("only-hardlinks/external").join(link_name);
let file_path = if index < half {
let file_name = format!("link0-{index}.txt"); let file_path = temp.join("only-hardlinks/mixed").join(file_name);
if let Err(error) = write_file(&file_path, "a".repeat(bytes_per_file)) {
panic!("Failed to write {bytes_per_file} bytes to {file_path:?}: {error}");
}
file_path
} else {
let file_name = format!("file-{index}.txt"); temp.join("some-hardlinks").join(file_name)
};
if let Err(error) = hard_link(&file_path, &link_path) {
panic!("Failed to link {file_path:?} to {link_path:?}: {error}");
}
});
SampleWorkspace(temp)
}
}
pub fn sanitize_tree_reflection<Name, Size>(
tree_reflection: DataTreeReflection<Name, Size>,
) -> DataTreeReflection<Name, Size>
where
Name: Ord,
Size: size::Size,
DataTreeReflection<Name, Size>: Send,
{
let DataTreeReflection {
name,
size,
children,
} = tree_reflection;
let children = children
.into_sorted_by(|left, right| left.name.cmp(&right.name))
.into_par_iter()
.map(sanitize_tree_reflection)
.collect();
DataTreeReflection {
name,
size,
children,
}
}
pub fn test_sample_tree<Size, SizeGetter>(root: &Path, size_getter: SizeGetter)
where
Size: size::Size<Inner = u64> + From<u64> + Send + Sync,
SizeGetter: GetSize<Size = Size> + Copy + Sync,
{
let suffix_size = |suffix: &str| -> Size {
root.join(suffix)
.pipe(metadata)
.unwrap_or_else(|error| panic!("get_size {suffix}: {error}"))
.pipe(|ref metadata| size_getter.get_size(metadata))
};
macro_rules! suffix_size {
($suffix:expr $(,)?) => {
suffix_size($suffix)
};
($head:expr, $($tail:expr),* $(,)?) => {
suffix_size($head) + suffix_size!($($tail),*)
};
}
let measure = |suffix: &str| {
FsTreeBuilder {
size_getter,
hardlinks_recorder: &HardlinkIgnorant,
reporter: &ErrorOnlyReporter::new(|error| {
panic!("Unexpected call to report_error: {error:?}")
}),
root: root.join(suffix),
max_depth: 10,
}
.pipe(DataTree::<OsStringDisplay, Size>::from)
.into_par_sorted(|left, right| left.name().cmp(right.name()))
.into_reflection()
};
let sub = |suffix: &str| root.join(suffix).pipe(OsStringDisplay::os_string_from);
assert_eq!(
measure("flat"),
sanitize_tree_reflection(DataTreeReflection {
name: sub("flat"),
size: suffix_size!("flat", "flat/0", "flat/1", "flat/2", "flat/3"),
children: vec![
DataTreeReflection {
name: OsStringDisplay::os_string_from("0"),
size: suffix_size("flat/0"),
children: Vec::new(),
},
DataTreeReflection {
name: OsStringDisplay::os_string_from("1"),
size: suffix_size("flat/1"),
children: Vec::new(),
},
DataTreeReflection {
name: OsStringDisplay::os_string_from("2"),
size: suffix_size("flat/2"),
children: Vec::new(),
},
DataTreeReflection {
name: OsStringDisplay::os_string_from("3"),
size: suffix_size("flat/3"),
children: Vec::new(),
},
]
}),
);
assert_eq!(
measure("nested"),
sanitize_tree_reflection(DataTreeReflection {
name: sub("nested"),
size: suffix_size!("nested", "nested/0", "nested/0/1"),
children: vec![DataTreeReflection {
name: OsStringDisplay::os_string_from("0"),
size: suffix_size!("nested/0", "nested/0/1"),
children: vec![DataTreeReflection {
name: OsStringDisplay::os_string_from("1"),
size: suffix_size!("nested/0/1"),
children: Vec::new(),
}]
}],
}),
);
assert_eq!(
measure("empty-dir"),
sanitize_tree_reflection(DataTreeReflection {
name: sub("empty-dir"),
size: suffix_size!("empty-dir"),
children: Vec::new(),
}),
);
}
pub const PDU: &str = env!("CARGO_BIN_EXE_pdu");
#[derive(Debug, Default, Clone)]
pub struct CommandRepresentation<'a> {
args: Vec<&'a str>,
}
impl<'a> CommandRepresentation<'a> {
pub fn arg(mut self, arg: &'a str) -> Self {
self.args.push(arg);
self
}
}
#[derive(Debug, Clone, AsRef, Deref)]
pub struct CommandList<'a>(Vec<CommandRepresentation<'a>>);
impl<'a> Default for CommandList<'a> {
fn default() -> Self {
CommandRepresentation::default()
.pipe(|x| vec![x])
.pipe(CommandList)
}
}
impl<'a> CommandList<'a> {
pub fn flag_matrix(self, name: &'a str) -> Self {
Self::assert_flag(name);
let CommandList(list) = self;
list.clone()
.into_iter()
.map(|cmd| cmd.arg(name))
.chain(list)
.collect::<Vec<_>>()
.pipe(CommandList)
}
pub fn option_matrix<const LEN: usize>(self, name: &'a str, values: [&'a str; LEN]) -> Self {
Self::assert_flag(name);
let CommandList(tail) = self;
let mut head: Vec<_> = values
.iter()
.copied()
.flat_map(|value| {
tail.clone()
.into_iter()
.map(move |cmd| cmd.arg(name).arg(value))
})
.collect();
head.extend(tail);
CommandList(head)
}
pub fn commands(&'a self) -> impl Iterator<Item = Command> + 'a {
self.iter()
.map(|cmd| Command::new(PDU).with_args(&cmd.args))
}
fn assert_flag(name: &str) {
match name.len() {
0 | 1 => panic!("{name:?} is not a valid flag"),
2 => assert!(name.starts_with('-'), "{name:?} is not a valid flag"),
_ => assert!(name.starts_with("--"), "{name:?} is not a valid flag"),
}
}
}
pub fn stdout_text(
Output {
status,
stdout,
stderr,
}: Output,
) -> String {
inspect_stderr(&stderr);
assert!(
status.success(),
"progress exits with non-zero status: {status:?}",
);
stdout
.pipe(String::from_utf8)
.expect("parse stdout as UTF-8")
.trim_end()
.to_string()
}
pub fn inspect_stderr(stderr: &[u8]) {
let text = String::from_utf8_lossy(stderr);
let text = text.trim();
if !text.is_empty() {
eprintln!("STDERR:\n{text}\n");
}
}
pub fn sort_reflection_by<Name, Size, Order>(
reflection: &mut DataTreeReflection<Name, Size>,
order: Order,
) where
Size: size::Size,
Order:
FnMut(&DataTreeReflection<Name, Size>, &DataTreeReflection<Name, Size>) -> Ordering + Copy,
{
reflection.children.sort_by(order);
for child in &mut reflection.children {
sort_reflection_by(child, order);
}
}
pub fn read_apparent_size(path: &Path) -> u64 {
path.pipe(symlink_metadata)
.unwrap_or_else(|error| panic!("Can't read metadata at {path:?}: {error}"))
.len()
}
#[cfg(unix)]
pub fn read_inode_number(path: &Path) -> u64 {
use std::os::unix::fs::MetadataExt;
path.pipe(symlink_metadata)
.unwrap_or_else(|error| panic!("Can't read metadata at {path:?}: {error}"))
.ino()
}