Skip to main content

clean_dev_dirs/utils/
size.rs

1//! Size parsing and manipulation utilities.
2//!
3//! This module provides functions for parsing human-readable size strings
4//! (like "100MB" or "1.5GiB") into byte values, and for measuring directory
5//! sizes on disk.
6
7use std::path::Path;
8
9use anyhow::Result;
10use walkdir::WalkDir;
11
12/// Calculate the total size of a directory and all its contents, in bytes.
13///
14/// Recursively traverses the directory tree using `walkdir` and sums the sizes
15/// of all files found. Errors for individual entries (permission denied, broken
16/// symlinks, etc.) are silently skipped so the function always returns a result.
17///
18/// Returns `0` if the path does not exist or cannot be traversed at the root level.
19#[must_use]
20pub fn calculate_dir_size(path: &Path) -> u64 {
21    let mut total = 0u64;
22
23    for entry in WalkDir::new(path).into_iter().flatten() {
24        if entry.file_type().is_file()
25            && let Ok(metadata) = entry.metadata()
26        {
27            total += metadata.len();
28        }
29    }
30
31    total
32}
33
34/// Parse a human-readable size string into bytes.
35///
36/// Supports both decimal (KB, MB, GB) and binary (KiB, MiB, GiB) units,
37/// as well as decimal numbers (e.g., "1.5GB").
38///
39/// # Arguments
40///
41/// * `size_str` - A string representing the size (e.g., "100MB", "1.5GiB", "1,000,000")
42///
43/// # Returns
44///
45/// - `Ok(u64)` - The size in bytes
46/// - `Err(anyhow::Error)` - If the string format is invalid or causes overflow
47///
48/// # Errors
49///
50/// This function will return an error if:
51/// - The size string format is invalid (e.g., "1.2.3MB", "invalid")
52/// - The number cannot be parsed as a valid integer or decimal
53/// - The resulting value would overflow `u64`
54/// - The decimal has too many fractional digits (more than 9)
55///
56/// # Examples
57///
58/// ```
59/// # use clean_dev_dirs::utils::parse_size;
60/// # use anyhow::Result;
61/// # fn main() -> Result<()> {
62/// assert_eq!(parse_size("100KB")?, 100_000);
63/// assert_eq!(parse_size("1.5MB")?, 1_500_000);
64/// assert_eq!(parse_size("1GiB")?, 1_073_741_824);
65/// # Ok(())
66/// # }
67/// ```
68///
69/// # Supported Units
70///
71/// - **Decimal**: KB (1000), MB (1000²), GB (1000³)
72/// - **Binary**: KiB (1024), MiB (1024²), GiB (1024³)
73/// - **Bytes**: Plain numbers without units
74pub fn parse_size(size_str: &str) -> Result<u64> {
75    if size_str == "0" {
76        return Ok(0);
77    }
78
79    let size_str = size_str.to_uppercase();
80    let (number_str, multiplier) = parse_size_unit(&size_str);
81
82    if number_str.contains('.') {
83        parse_decimal_size(number_str, multiplier)
84    } else {
85        parse_integer_size(number_str, multiplier)
86    }
87}
88
89/// Parse the unit suffix and return the numeric part with its multiplier.
90fn parse_size_unit(size_str: &str) -> (&str, u64) {
91    const UNITS: &[(&str, u64)] = &[
92        ("GIB", 1_073_741_824),
93        ("MIB", 1_048_576),
94        ("KIB", 1_024),
95        ("GB", 1_000_000_000),
96        ("MB", 1_000_000),
97        ("KB", 1_000),
98    ];
99
100    for (suffix, multiplier) in UNITS {
101        if size_str.ends_with(suffix) {
102            return (size_str.trim_end_matches(suffix), *multiplier);
103        }
104    }
105
106    (size_str, 1)
107}
108
109/// Parse a decimal size value (e.g., "1.5").
110fn parse_decimal_size(number_str: &str, multiplier: u64) -> Result<u64> {
111    let parts: Vec<&str> = number_str.split('.').collect();
112    if parts.len() != 2 {
113        return Err(anyhow::anyhow!("Invalid decimal format: {number_str}"));
114    }
115
116    let integer_part: u64 = parts[0].parse().unwrap_or(0);
117    let fractional_result = parse_fractional_part(parts[1])?;
118
119    let integer_bytes = multiply_with_overflow_check(integer_part, multiplier)?;
120    let fractional_bytes =
121        multiply_with_overflow_check(fractional_result, multiplier)? / 1_000_000_000;
122
123    add_with_overflow_check(integer_bytes, fractional_bytes)
124}
125
126/// Parse the fractional part of a decimal number.
127fn parse_fractional_part(fractional_str: &str) -> Result<u64> {
128    let fractional_digits = fractional_str.len();
129    if fractional_digits > 9 {
130        return Err(anyhow::anyhow!("Too many decimal places: {fractional_str}"));
131    }
132
133    let fractional_part: u64 = fractional_str.parse()?;
134    let fractional_multiplier = 10u64.pow(9 - u32::try_from(fractional_digits)?);
135
136    Ok(fractional_part * fractional_multiplier)
137}
138
139/// Parse an integer size value.
140fn parse_integer_size(number_str: &str, multiplier: u64) -> Result<u64> {
141    let number: u64 = number_str.parse()?;
142    multiply_with_overflow_check(number, multiplier)
143}
144
145/// Multiply two values with overflow checking.
146fn multiply_with_overflow_check(a: u64, b: u64) -> Result<u64> {
147    a.checked_mul(b)
148        .ok_or_else(|| anyhow::anyhow!("Size value overflow: {a} * {b}"))
149}
150
151/// Add two values with overflow checking.
152fn add_with_overflow_check(a: u64, b: u64) -> Result<u64> {
153    a.checked_add(b)
154        .ok_or_else(|| anyhow::anyhow!("Final overflow: {a} + {b}"))
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    #[test]
162    fn test_parse_size_zero() {
163        assert_eq!(parse_size("0").unwrap(), 0);
164    }
165
166    #[test]
167    fn test_parse_size_plain_bytes() {
168        assert_eq!(parse_size("1000").unwrap(), 1000);
169        assert_eq!(parse_size("12345").unwrap(), 12345);
170        assert_eq!(parse_size("1").unwrap(), 1);
171    }
172
173    #[test]
174    fn test_parse_size_decimal_units() {
175        assert_eq!(parse_size("1KB").unwrap(), 1_000);
176        assert_eq!(parse_size("100KB").unwrap(), 100_000);
177        assert_eq!(parse_size("1MB").unwrap(), 1_000_000);
178        assert_eq!(parse_size("5MB").unwrap(), 5_000_000);
179        assert_eq!(parse_size("1GB").unwrap(), 1_000_000_000);
180        assert_eq!(parse_size("2GB").unwrap(), 2_000_000_000);
181    }
182
183    #[test]
184    fn test_parse_size_binary_units() {
185        assert_eq!(parse_size("1KiB").unwrap(), 1_024);
186        assert_eq!(parse_size("1MiB").unwrap(), 1_048_576);
187        assert_eq!(parse_size("1GiB").unwrap(), 1_073_741_824);
188        assert_eq!(parse_size("2KiB").unwrap(), 2_048);
189        assert_eq!(parse_size("10MiB").unwrap(), 10_485_760);
190    }
191
192    #[test]
193    fn test_parse_size_case_insensitive() {
194        assert_eq!(parse_size("1kb").unwrap(), 1_000);
195        assert_eq!(parse_size("1Kb").unwrap(), 1_000);
196        assert_eq!(parse_size("1kB").unwrap(), 1_000);
197        assert_eq!(parse_size("1mb").unwrap(), 1_000_000);
198        assert_eq!(parse_size("1mib").unwrap(), 1_048_576);
199        assert_eq!(parse_size("1gib").unwrap(), 1_073_741_824);
200    }
201
202    #[test]
203    fn test_parse_size_decimal_values() {
204        assert_eq!(parse_size("1.5KB").unwrap(), 1_500);
205        assert_eq!(parse_size("2.5MB").unwrap(), 2_500_000);
206        assert_eq!(parse_size("1.5MiB").unwrap(), 1_572_864); // 1.5 * 1048576
207        assert_eq!(parse_size("0.5GB").unwrap(), 500_000_000);
208        assert_eq!(parse_size("0.1KB").unwrap(), 100);
209    }
210
211    #[test]
212    fn test_parse_size_complex_decimals() {
213        assert_eq!(parse_size("1.25MB").unwrap(), 1_250_000);
214        assert_eq!(parse_size("3.14159KB").unwrap(), 3_141); // Truncated due to precision
215        assert_eq!(parse_size("2.75GiB").unwrap(), 2_952_790_016); // 2.75 * 1073741824
216    }
217
218    #[test]
219    fn test_parse_size_invalid_formats() {
220        assert!(parse_size("").is_err());
221        assert!(parse_size("invalid").is_err());
222        assert!(parse_size("1.2.3MB").is_err());
223        assert!(parse_size("MB1").is_err());
224        assert!(parse_size("1XB").is_err());
225        assert!(parse_size("-1MB").is_err());
226    }
227
228    #[test]
229    fn test_parse_size_unit_order() {
230        // Test that longer units are matched first (GiB before GB, MiB before MB, etc.)
231        assert_eq!(parse_size("1GiB").unwrap(), 1_073_741_824);
232        assert_eq!(parse_size("1GB").unwrap(), 1_000_000_000);
233        assert_eq!(parse_size("1MiB").unwrap(), 1_048_576);
234        assert_eq!(parse_size("1MB").unwrap(), 1_000_000);
235    }
236
237    #[test]
238    fn test_parse_size_overflow() {
239        // Test with values that would cause overflow
240        let max_u64_str = format!("{}", u64::MAX);
241        let too_large = format!("{}GB", u64::MAX / 1000 + 1);
242
243        assert!(parse_size(&max_u64_str).is_ok());
244        assert!(parse_size(&too_large).is_err());
245        assert!(parse_size("999999999999999999999999GB").is_err());
246    }
247
248    #[test]
249    fn test_parse_fractional_part() {
250        assert_eq!(parse_fractional_part("5").unwrap(), 500_000_000);
251        assert_eq!(parse_fractional_part("25").unwrap(), 250_000_000);
252        assert_eq!(parse_fractional_part("125").unwrap(), 125_000_000);
253        assert_eq!(parse_fractional_part("999999999").unwrap(), 999_999_999);
254
255        // Too many decimal places
256        assert!(parse_fractional_part("1234567890").is_err());
257    }
258
259    #[test]
260    fn test_multiply_with_overflow_check() {
261        assert_eq!(multiply_with_overflow_check(100, 200).unwrap(), 20_000);
262        assert_eq!(multiply_with_overflow_check(0, 999).unwrap(), 0);
263        assert_eq!(multiply_with_overflow_check(1, 1).unwrap(), 1);
264
265        // Test overflow
266        assert!(multiply_with_overflow_check(u64::MAX, 2).is_err());
267        assert!(multiply_with_overflow_check(u64::MAX / 2 + 1, 2).is_err());
268    }
269
270    #[test]
271    fn test_add_with_overflow_check() {
272        assert_eq!(add_with_overflow_check(100, 200).unwrap(), 300);
273        assert_eq!(add_with_overflow_check(0, 999).unwrap(), 999);
274        assert_eq!(add_with_overflow_check(u64::MAX - 1, 1).unwrap(), u64::MAX);
275
276        // Test overflow
277        assert!(add_with_overflow_check(u64::MAX, 1).is_err());
278        assert!(add_with_overflow_check(u64::MAX - 1, 2).is_err());
279    }
280
281    #[test]
282    fn test_parse_size_unit() {
283        assert_eq!(parse_size_unit("100GB"), ("100", 1_000_000_000));
284        assert_eq!(parse_size_unit("50MIB"), ("50", 1_048_576));
285        assert_eq!(parse_size_unit("1024"), ("1024", 1));
286        assert_eq!(parse_size_unit("2.5KB"), ("2.5", 1_000));
287        assert_eq!(parse_size_unit("1.5GIB"), ("1.5", 1_073_741_824));
288    }
289
290    #[test]
291    fn test_parse_decimal_size() {
292        assert_eq!(parse_decimal_size("1.5", 1_000_000).unwrap(), 1_500_000);
293        assert_eq!(parse_decimal_size("2.25", 1_000).unwrap(), 2_250);
294        assert_eq!(
295            parse_decimal_size("0.5", 2_000_000_000).unwrap(),
296            1_000_000_000
297        );
298
299        // Invalid formats
300        assert!(parse_decimal_size("1.2.3", 1000).is_err());
301        assert!(parse_decimal_size("invalid", 1000).is_err());
302    }
303
304    #[test]
305    fn test_parse_integer_size() {
306        assert_eq!(parse_integer_size("100", 1_000).unwrap(), 100_000);
307        assert_eq!(parse_integer_size("0", 999).unwrap(), 0);
308        assert_eq!(
309            parse_integer_size("1", 1_000_000_000).unwrap(),
310            1_000_000_000
311        );
312
313        // Invalid format
314        assert!(parse_integer_size("not_a_number", 1000).is_err());
315    }
316
317    #[test]
318    fn test_edge_cases() {
319        // Very small decimal
320        assert_eq!(parse_size("0.001KB").unwrap(), 1);
321
322        // Very large valid number
323        let large_but_valid = (u64::MAX / 1_000_000_000).to_string() + "GB";
324        assert!(parse_size(&large_but_valid).is_ok());
325
326        // Zero with units
327        assert_eq!(parse_size("0KB").unwrap(), 0);
328        assert_eq!(parse_size("0.0MB").unwrap(), 0);
329    }
330}