find_identical_files/
lib.rs

1mod args;
2mod enumerations;
3mod excel;
4mod structures;
5
6// https://crates.io/crates/cfg-if
7cfg_if::cfg_if! {
8    if #[cfg(feature = "walkdir")] {
9        mod with_walkdir;
10        pub use with_walkdir::get_all_files;
11    } else {
12        // default: use jwalk
13        mod with_jwalk;
14        pub use with_jwalk::get_all_files;
15    }
16}
17
18pub use self::{
19    args::Arguments,
20    enumerations::algo::{Algorithm, PathBufExtension},
21    structures::file_info::{FileExtension, FileInfo},
22    structures::group_info::{GroupExtension, GroupInfo},
23    structures::key_info::Key,
24    structures::path_info::PathInfo,
25    structures::total_info::TotalInfo,
26};
27pub use excel::write_xlsx;
28use serde::Serializer;
29use std::{
30    fs::{self, File},
31    io,
32    path::{Path, PathBuf},
33    process::Command,
34    str,
35};
36
37pub type MyError = Box<dyn std::error::Error + Send + Sync>;
38pub type MyResult<T> = Result<T, MyError>;
39
40const STACK_SIZE: usize = 64 * 1024 * 1024;
41const SEPARATOR: char = '.'; // thousands sep
42pub const CSV_FILENAME: &str = "fif.csv";
43pub const XLSX_FILENAME: &str = "fif.xlsx";
44
45/**
46If `thread '<unknown>' has overflowed its stack`, set the stack size to a new value.
47
48The default rustc stack size for main thread is currently `8 * 1024 * 1024`.
49
50Stack size can be changed during compile time.
51
52<https://github.com/rust-lang/rust/blob/master/compiler/rustc_interface/src/util.rs#L132>
53
54<https://github.com/rust-lang/rust/issues/111897>
55
56<https://doc.rust-lang.org/stable/std/thread/index.html#stack-size>
57*/
58pub fn set_env_variables() {
59    // TODO: Audit that the environment access only happens in single-threaded code.
60    unsafe { std::env::set_var("RUST_MIN_STACK", STACK_SIZE.to_string()) };
61}
62
63/// Opens a file in read-only mode.
64///
65/// Provides more informative error messages in case of failure.
66pub fn open_file<P>(path: &P) -> MyResult<File>
67where
68    P: AsRef<Path> + std::fmt::Debug,
69{
70    fs::OpenOptions::new()
71        .read(true)
72        .write(false) // This option, when false, will indicate that the file should not be writable if opened.
73        .create(false) // No files will be created
74        .open(path.as_ref())
75        .map_err(|error| match error.kind() {
76            io::ErrorKind::NotFound => {
77                format!("File not found: {path:?}\nPerhaps some temporary files no longer exist!")
78                    .into()
79            }
80            io::ErrorKind::PermissionDenied => {
81                format!("Permission denied when opening file: {path:?}").into()
82            }
83            _ => format!("Failed to open file {path:?}: {error}").into(),
84        })
85}
86
87/// Get path from arguments or from default (current directory).
88pub fn get_path(arguments: &Arguments) -> MyResult<PathBuf> {
89    let path: PathBuf = match &arguments.input_dir {
90        Some(path) => path.to_owned(),
91        None => PathBuf::from("."),
92    };
93
94    if arguments.extended_path {
95        Ok(fs::canonicalize(path)?) // full path
96    } else {
97        Ok(path) // relative path
98    }
99}
100
101/// Print buffer to stdout
102pub fn my_print(buffer: &[u8]) -> MyResult<()> {
103    // Converts a slice of bytes to a string slice
104    let print_msg = match str::from_utf8(buffer) {
105        Ok(valid_uft8) => valid_uft8,
106        Err(error) => {
107            eprintln!("fn my_print()");
108            eprintln!("Invalid UTF-8 sequence!");
109            panic!("{error}");
110        }
111    };
112
113    // Print to stdout
114    print!("{print_msg}");
115    Ok(())
116}
117
118/// Clear (wipe) the terminal screen
119pub fn clear_terminal_screen() {
120    let result = if cfg!(target_os = "windows") {
121        Command::new("cmd").args(["/c", "cls"]).spawn()
122    } else {
123        // "clear" or "tput reset"
124        Command::new("tput").arg("reset").spawn()
125    };
126
127    // Alternative solution:
128    if result.is_err() {
129        print!("{esc}c", esc = 27 as char);
130    }
131}
132
133/// Split integer and insert thousands separator
134pub fn split_and_insert(integer: usize, insert: char) -> String {
135    let group_size = 3;
136    let integer_str = integer.to_string();
137
138    if integer <= 999 {
139        return integer_str;
140    }
141
142    let string_splitted: String = integer_str
143        .chars()
144        .enumerate()
145        .flat_map(|(i, c)| {
146            if (integer_str.len() - i) % group_size == 0 && i > 0 {
147                Some(insert)
148            } else {
149                None
150            }
151            .into_iter()
152            .chain(std::iter::once(c))
153        })
154        .collect::<String>();
155
156    string_splitted
157}
158
159/// Serialize usize with fn split_and_insert().
160pub fn add_thousands_separator<S>(size: &usize, serializer: S) -> Result<S::Ok, S::Error>
161where
162    S: Serializer,
163{
164    serializer.collect_str(&format!("{} bytes", &split_and_insert(*size, SEPARATOR)))
165}
166
167#[cfg(test)]
168mod test_lib {
169    use super::*;
170
171    #[test]
172    fn split_integer_into_groups() {
173        // cargo test -- --show-output split_integer_into_groups
174
175        let mut result: Vec<String> = Vec::new();
176
177        for integer in [
178            0, 1, 12, 999, 1000, 1001, 1234, 12345, 123456, 1234567, 12345678,
179        ] {
180            let integer_splitted: String = split_and_insert(integer, '_');
181            println!("integer: {integer:<8} ; with thousands sep: {integer_splitted}");
182            result.push(integer_splitted);
183        }
184
185        let valid = vec![
186            "0",
187            "1",
188            "12",
189            "999",
190            "1_000",
191            "1_001",
192            "1_234",
193            "12_345",
194            "123_456",
195            "1_234_567",
196            "12_345_678",
197        ];
198
199        assert_eq!(valid, result);
200    }
201}