find_duplicate_files/
lib.rs

1mod args;
2mod enumerations;
3mod excel;
4mod structures;
5
6// https://crates.io/crates/cfg-if
7cfg_if::cfg_if! {
8    if #[cfg(feature = "walkdir")] {
9        mod with_walkdir;
10        pub use with_walkdir::get_all_files;
11    } else {
12        // default: use jwalk
13        mod with_jwalk;
14        pub use with_jwalk::get_all_files;
15    }
16}
17
18pub use self::{
19    args::Arguments,
20    enumerations::algo::{Algorithm, PathBufExtension},
21    structures::file_info::{FileExtension, FileInfo},
22    structures::group_info::{GroupExtension, GroupInfo},
23    structures::key_info::Key,
24    structures::path_info::PathInfo,
25    structures::total_info::TotalInfo,
26};
27pub use excel::write_xlsx;
28use serde::Serializer;
29use std::{
30    fs::{self, File},
31    path::{Path, PathBuf},
32    process::Command,
33    str,
34};
35
36pub type MyError = Box<dyn std::error::Error + Send + Sync>;
37pub type MyResult<T> = Result<T, MyError>;
38
39const STACK_SIZE: usize = 64 * 1024 * 1024;
40const SEPARATOR: char = '.'; // thousands sep
41pub const CSV_FILENAME: &str = "fdf.csv";
42pub const XLSX_FILENAME: &str = "fdf.xlsx";
43
44/**
45If `thread '<unknown>' has overflowed its stack`, set the stack size to a new value.
46
47The default rustc stack size for main thread is currently `8 * 1024 * 1024`.
48
49Stack size can be changed during compile time.
50
51<https://github.com/rust-lang/rust/blob/master/compiler/rustc_interface/src/util.rs#L132>
52
53<https://github.com/rust-lang/rust/issues/111897>
54
55<https://doc.rust-lang.org/stable/std/thread/index.html#stack-size>
56*/
57pub fn set_env_variables() {
58    std::env::set_var("RUST_MIN_STACK", STACK_SIZE.to_string());
59}
60
61/// File is an object providing access to an open file on the filesystem.
62pub fn open_file<P>(path: &P) -> MyResult<File>
63where
64    P: AsRef<Path> + std::fmt::Debug,
65{
66    let file: File = match fs::OpenOptions::new()
67        .read(true)
68        .write(false) // This option, when false, will indicate that the file should not be writable if opened.
69        .create(false) // No files will be created
70        .open(path)
71    {
72        Ok(file) => file,
73        Err(error) => {
74            eprintln!("Failed to open file {path:?}");
75            eprintln!("Perhaps some temporary files no longer exist!");
76            eprintln!("Or lack of permission to read this file!");
77            panic!("{error}");
78        }
79    };
80
81    Ok(file)
82}
83
84/// Get path from arguments or from default (current directory).
85pub fn get_path(arguments: &Arguments) -> MyResult<PathBuf> {
86    let path: PathBuf = match &arguments.input_dir {
87        Some(path) => path.to_owned(),
88        None => PathBuf::from("."),
89    };
90
91    if arguments.full_path {
92        Ok(fs::canonicalize(path)?) // full path
93    } else {
94        Ok(path) // relative path
95    }
96}
97
98/// Print buffer to stdout
99pub fn my_print(buffer: &[u8]) -> MyResult<()> {
100    // Converts a slice of bytes to a string slice
101    let print_msg = match str::from_utf8(buffer) {
102        Ok(valid_uft8) => valid_uft8,
103        Err(error) => {
104            eprintln!("fn my_print()");
105            eprintln!("Invalid UTF-8 sequence!");
106            panic!("{error}");
107        }
108    };
109
110    // Print to stdout
111    print!("{print_msg}");
112    Ok(())
113}
114
115// https://stackoverflow.com/questions/34837011/how-to-clear-the-terminal-screen-in-rust-after-a-new-line-is-printed
116// https://stackoverflow.com/questions/65497187/cant-run-a-system-command-in-windows
117// https://askubuntu.com/questions/25077/how-to-really-clear-the-terminal
118// https://www.redswitches.com/blog/how-to-clear-terminal-screen-in-linux
119// Remove unwanted characters
120// clear | cat -v ; echo
121// ^[[H^[[2J^[[3J
122/// Clear (wipe) the terminal screen
123pub fn clear_terminal_screen() {
124    if cfg!(target_os = "windows") {
125        Command::new("cmd")
126            .args(["/c", "cls"])
127            .spawn()
128            .expect("cls command failed to start")
129            .wait()
130            .expect("failed to wait");
131    } else {
132        Command::new("tput") // "clear" or "tput reset"
133            .arg("reset")
134            .spawn()
135            .expect("tput command failed to start")
136            .wait()
137            .expect("failed to wait");
138    };
139}
140
141/// Split integer and insert thousands separator
142pub fn split_and_insert(integer: usize, insert: char) -> String {
143    let group_size = 3;
144    let integer_str = integer.to_string();
145
146    if integer <= 999 {
147        return integer_str;
148    }
149
150    let string_splitted: String = integer_str
151        .chars()
152        .enumerate()
153        .flat_map(|(i, c)| {
154            if (integer_str.len() - i) % group_size == 0 && i > 0 {
155                Some(insert)
156            } else {
157                None
158            }
159            .into_iter()
160            .chain(std::iter::once(c))
161        })
162        .collect::<String>();
163
164    string_splitted
165}
166
167/// Serialize usize with fn split_and_insert().
168pub fn add_thousands_separator<S>(size: &usize, serializer: S) -> Result<S::Ok, S::Error>
169where
170    S: Serializer,
171{
172    serializer.collect_str(&format!("{} bytes", &split_and_insert(*size, SEPARATOR)))
173}
174
175#[cfg(test)]
176mod test_lib {
177    use super::*;
178
179    #[test]
180    fn split_integer_into_groups() {
181        // cargo test -- --show-output split_integer_into_groups
182
183        let mut result: Vec<String> = Vec::new();
184
185        for integer in [
186            0, 1, 12, 999, 1000, 1001, 1234, 12345, 123456, 1234567, 12345678,
187        ] {
188            let integer_splitted: String = split_and_insert(integer, '_');
189            println!("integer: {integer:<8} ; with thousands sep: {integer_splitted}");
190            result.push(integer_splitted);
191        }
192
193        let valid = vec![
194            "0",
195            "1",
196            "12",
197            "999",
198            "1_000",
199            "1_001",
200            "1_234",
201            "12_345",
202            "123_456",
203            "1_234_567",
204            "12_345_678",
205        ];
206
207        assert_eq!(valid, result);
208    }
209}