Skip to main content

find_identical_files/
lib.rs

1mod args;
2mod enumerations;
3mod error;
4mod excel;
5mod separator;
6mod structures;
7mod traits;
8
9// https://crates.io/crates/cfg-if
10cfg_if::cfg_if! {
11    if #[cfg(feature = "walkdir")] {
12        mod with_walkdir;
13        pub use with_walkdir::get_all_files;
14    } else {
15        // default: use jwalk
16        mod with_jwalk;
17        pub use with_jwalk::get_all_files;
18    }
19}
20
21pub use self::{
22    args::Arguments,
23    enumerations::algo::{Algorithm, PathBufExtension, SliceExtension},
24    enumerations::procedures::*,
25    error::*,
26    separator::get_thousands_separator,
27    structures::file_info::{FileExtension, FileInfo},
28    structures::group_info::{GroupExtension, GroupInfo},
29    structures::key_info::Key,
30    structures::path_info::PathInfo,
31    structures::total_info::TotalInfo,
32};
33pub use excel::write_xlsx;
34use serde::Serializer;
35use std::{
36    fmt::{self, Write as FmtWrite}, // Rename to avoid conflict
37    fs::{self, File},
38    io::{self, Write as IoWrite}, // Rename to avoid conflict
39    path::{Path, PathBuf},
40    process::Command,
41    str,
42};
43
44pub const CSV_FILENAME: &str = "fif.csv";
45pub const XLSX_FILENAME: &str = "fif.xlsx";
46
47/// Opens a file in read-only mode.
48///
49/// Provides more informative error messages in case of failure.
50pub fn open_file<P>(path: &P) -> FIFResult<File>
51where
52    P: AsRef<Path> + std::fmt::Debug,
53{
54    fs::OpenOptions::new()
55        .read(true)
56        .write(false) // This option, when false, will indicate that the file should not be writable if opened.
57        .create(false) // No files will be created
58        .open(path.as_ref())
59        .map_err(|error| {
60            let path_buf = path.as_ref().to_path_buf();
61            match error.kind() {
62                io::ErrorKind::NotFound => FIFError::FileNotFound { path: path_buf },
63                io::ErrorKind::PermissionDenied => FIFError::PermissionDenied { path: path_buf },
64                _ => FIFError::FileOpenError {
65                    path: path_buf,
66                    io_error: error,
67                },
68            }
69        })
70}
71
72/// Get path from arguments or from default (current directory).
73pub fn get_path(arguments: &Arguments) -> FIFResult<PathBuf> {
74    let path: PathBuf = match &arguments.input_dir {
75        Some(path) => path.to_owned(),
76        None => PathBuf::from("."),
77    };
78
79    if arguments.extended_path {
80        Ok(fs::canonicalize(path)?) // full path
81    } else {
82        Ok(path) // relative path
83    }
84}
85
86/// Prints the provided byte buffer to standard output as a UTF-8 string.
87///
88/// # Errors
89/// Returns `FIFError::Utf8Error` if the buffer contains invalid UTF-8 sequences.
90/// Returns `FIFError::Io` if writing to stdout fails.
91pub fn my_print(buffer: &[u8]) -> FIFResult<()> {
92    // Attempt to convert the raw byte slice into a valid UTF-8 string slice.
93    // The '?' operator will catch any Utf8Error and wrap it into FIFError.
94    let print_msg = str::from_utf8(buffer)?;
95
96    // Print to standard output.
97    print!("{print_msg}");
98
99    // Optional but recommended: flush stdout to ensure the output is displayed immediately.
100    io::stdout().flush()?;
101
102    Ok(())
103}
104
105/// Clear (wipe) the terminal screen
106pub fn clear_terminal_screen() {
107    let result = if cfg!(target_os = "windows") {
108        Command::new("cmd").args(["/c", "cls"]).spawn()
109    } else {
110        // "clear" or "tput reset"
111        Command::new("tput").arg("reset").spawn()
112    };
113
114    // Alternative solution:
115    if result.is_err() {
116        print!("{esc}c", esc = 27 as char);
117    }
118}
119/// Split integer and insert thousands separator
120///
121/// This internal function is generic over any 'Writer' (String, Formatter, etc.).
122/// It avoids code duplication while maintaining maximum performance.
123fn write_integer_with_separator<W: FmtWrite>(
124    integer: usize,
125    separator: char,
126    writer: &mut W,
127) -> fmt::Result {
128    let s = integer.to_string();
129    let bytes = s.as_bytes();
130    let len = bytes.len();
131
132    for (i, &byte) in bytes.iter().enumerate() {
133        // Apply thousands separator every 3 digits from the right.
134        if i > 0 && (len - i).is_multiple_of(3) {
135            writer.write_char(separator)?;
136        }
137        writer.write_char(byte as char)?;
138    }
139    Ok(())
140}
141
142/// High-performance string formatter returning a custom Result.
143///
144/// Performance wins:
145/// 1. Exact capacity allocation (Single Heap Allocation).
146/// 2. Automatic error propagation via `?` (Requires `From<fmt::Error>` in FIFError).
147pub fn split_and_insert(integer: usize, separator: char) -> FIFResult<String> {
148    let s_val = integer.to_string();
149    let len = s_val.len();
150
151    // Small optimization: skip processing if no separators are needed.
152    if len <= 3 {
153        return Ok(s_val);
154    }
155
156    // Step 1: Exact Capacity Calculation.
157    // Length of digits + (Number of separators * bytes per separator).
158    let num_seps = (len - 1) / 3;
159    let final_capacity = len + (num_seps * separator.len_utf8());
160
161    // Step 2: Allocate memory once.
162    let mut result = String::with_capacity(final_capacity);
163
164    // Step 3: Write digits directly into the allocated buffer.
165    // The '?' operator works here because FIFError implements From<std::fmt::Error>.
166    write_integer_with_separator(integer, separator, &mut result)?;
167
168    Ok(result)
169}
170
171/// Internal helper to format values directly into a stream.
172/// This avoids allocating a `String` inside the Serializer.
173struct BytesFormatter(usize);
174
175impl fmt::Display for BytesFormatter {
176    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
177        let sep = get_thousands_separator();
178
179        // Write digits directly to the output stream (f).
180        write_integer_with_separator(self.0, sep, f)?;
181
182        // Append suffix directly to the output stream.
183        f.write_str(" bytes")
184    }
185}
186
187/// Serde Serializer: Formats usize as a string with separators (e.g., "1.234 bytes").
188/// Highly efficient: Writes directly to the serializer's buffer without
189/// temporary String allocations on the heap.
190pub fn add_thousands_separator<S>(size: &usize, serializer: S) -> Result<S::Ok, S::Error>
191where
192    S: Serializer,
193{
194    // Use collect_str to pipe our Display implementation into the Serializer.
195    serializer.collect_str(&BytesFormatter(*size))
196}
197
198#[cfg(test)]
199mod tests_lib {
200    use super::*;
201
202    #[test]
203    /// cargo test -- --show-output split_integer_into_groups
204    fn split_integer_into_groups() -> FIFResult<()> {
205        let mut result: Vec<String> = Vec::new();
206
207        for integer in [
208            0, 1, 12, 999, 1000, 1001, 1234, 12345, 123456, 1234567, 12345678,
209        ] {
210            let integer_splitted: String = split_and_insert(integer, '_')?;
211            println!("integer: {integer:<8} ; with thousands sep: {integer_splitted}");
212            result.push(integer_splitted);
213        }
214
215        let valid = vec![
216            "0",
217            "1",
218            "12",
219            "999",
220            "1_000",
221            "1_001",
222            "1_234",
223            "12_345",
224            "123_456",
225            "1_234_567",
226            "12_345_678",
227        ];
228
229        assert_eq!(valid, result);
230        Ok(())
231    }
232}