agpm_cli/utils/fs/
metadata.rs

1//! File metadata operations including size calculation, checksums, and file queries.
2//!
3//! This module provides functions for:
4//! - Directory size calculation (recursive)
5//! - SHA-256 checksum generation (single and parallel)
6//! - File existence and readability checks
7//! - File modification time queries and comparisons
8//!
9//! # Examples
10//!
11//! ```rust,no_run
12//! use agpm_cli::utils::fs::metadata::{calculate_checksum, dir_size, file_exists_and_readable};
13//! use std::path::Path;
14//!
15//! # fn example() -> anyhow::Result<()> {
16//! // Check if file is readable
17//! if file_exists_and_readable(Path::new("important.txt")) {
18//!     // Calculate checksum for integrity verification
19//!     let checksum = calculate_checksum(Path::new("important.txt"))?;
20//!     println!("File checksum: {}", checksum);
21//! }
22//!
23//! // Calculate directory size
24//! let size = dir_size(Path::new("cache"))?;
25//! println!("Cache size: {} bytes", size);
26//! # Ok(())
27//! # }
28//! ```
29
30use anyhow::{Context, Result};
31use futures::future::try_join_all;
32use sha2::{Digest, Sha256};
33use std::fs;
34use std::path::{Path, PathBuf};
35
36/// Calculates the total size of a directory and all its contents recursively.
37///
38/// This function traverses the directory tree and sums the sizes of all regular files.
39/// It handles nested directories and provides the total disk usage for the directory tree.
40///
41/// # Arguments
42///
43/// * `path` - The directory to calculate size for
44///
45/// # Returns
46///
47/// The total size in bytes, or an error if the directory cannot be read
48///
49/// # Examples
50///
51/// ```rust,no_run
52/// use agpm_cli::utils::fs::metadata::dir_size;
53/// use std::path::Path;
54///
55/// # fn example() -> anyhow::Result<()> {
56/// let cache_size = dir_size(Path::new("~/.agpm/cache"))?;
57/// println!("Cache size: {} bytes ({:.2} MB)", cache_size, cache_size as f64 / 1024.0 / 1024.0);
58/// # Ok(())
59/// # }
60/// ```
61///
62/// # Behavior
63///
64/// - Recursively traverses all subdirectories
65/// - Includes only regular files in size calculation
66/// - Does not follow symbolic links
67/// - Returns 0 for empty directories
68/// - Accumulates sizes using 64-bit integers (supports very large directories)
69///
70/// # Performance
71///
72/// This is a synchronous operation that may take time for large directory trees.
73/// For better performance with large directories, use [`get_directory_size`] which
74/// runs the calculation on a separate thread.
75///
76/// # See Also
77///
78/// - [`get_directory_size`] for async version
79/// - Platform-specific tools may be faster for very large directories
80pub fn dir_size(path: &Path) -> Result<u64> {
81    let mut size = 0;
82
83    for entry in fs::read_dir(path)? {
84        let entry = entry?;
85        let metadata = entry.metadata()?;
86
87        if metadata.is_dir() {
88            size += dir_size(&entry.path())?;
89        } else {
90            size += metadata.len();
91        }
92    }
93
94    Ok(size)
95}
96
97/// Asynchronously calculates the total size of a directory and all its contents.
98///
99/// This is the async version of [`dir_size`] that runs the calculation on a separate
100/// thread to avoid blocking the async runtime. Use this when calculating directory
101/// sizes as part of async operations.
102///
103/// # Arguments
104///
105/// * `path` - The directory to calculate size for
106///
107/// # Returns
108///
109/// The total size in bytes, or an error if the operation fails
110///
111/// # Examples
112///
113/// ```rust,no_run
114/// use agpm_cli::utils::fs::metadata::get_directory_size;
115/// use std::path::Path;
116///
117/// # async fn example() -> anyhow::Result<()> {
118/// let cache_size = get_directory_size(Path::new("~/.agpm/cache")).await?;
119/// println!("Cache size: {} bytes", cache_size);
120/// # Ok(())
121/// # }
122/// ```
123///
124/// # Performance
125///
126/// This function uses `tokio::task::spawn_blocking` to run the directory traversal
127/// on a thread pool, preventing it from blocking other async tasks. This is particularly
128/// useful when:
129/// - Calculating sizes for multiple directories concurrently
130/// - Integrating with async workflows
131/// - Avoiding blocking in async web servers or CLI applications
132///
133/// # See Also
134///
135/// - [`dir_size`] for synchronous version
136pub async fn get_directory_size(path: &Path) -> Result<u64> {
137    let path = path.to_path_buf();
138    tokio::task::spawn_blocking(move || dir_size(&path))
139        .await
140        .context("Failed to join directory size calculation task")?
141}
142
143/// Calculates the SHA-256 checksum of a file.
144///
145/// This function reads the entire file into memory and computes its SHA-256 hash,
146/// returning it as a lowercase hexadecimal string. This is useful for verifying
147/// file integrity and detecting changes.
148///
149/// # Arguments
150///
151/// * `path` - The path to the file to checksum
152///
153/// # Returns
154///
155/// A 64-character lowercase hexadecimal string representing the SHA-256 hash,
156/// or an error if the file cannot be read
157///
158/// # Examples
159///
160/// ```rust,no_run
161/// use agpm_cli::utils::fs::metadata::calculate_checksum;
162/// use std::path::Path;
163///
164/// # fn example() -> anyhow::Result<()> {
165/// let checksum = calculate_checksum(Path::new("important-file.txt"))?;
166/// println!("File checksum: {}", checksum);
167///
168/// // Verify against expected checksum
169/// let expected = "d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2";
170/// if checksum == expected {
171///     println!("File integrity verified!");
172/// }
173/// # Ok(())
174/// # }
175/// ```
176///
177/// # Performance
178///
179/// This function reads the entire file into memory, so it may not be suitable
180/// for very large files. For processing multiple files, consider using
181/// [`calculate_checksums_parallel`] for better performance.
182///
183/// # Security
184///
185/// SHA-256 is cryptographically secure and suitable for:
186/// - Integrity verification
187/// - Change detection
188/// - Digital signatures
189/// - Blockchain applications
190///
191/// # See Also
192///
193/// - [`calculate_checksums_parallel`] for batch processing
194/// - [`hex`] crate for hexadecimal encoding
195pub fn calculate_checksum(path: &Path) -> Result<String> {
196    let content = fs::read(path)
197        .with_context(|| format!("Failed to read file for checksum: {}", path.display()))?;
198
199    let mut hasher = Sha256::new();
200    hasher.update(&content);
201    let result = hasher.finalize();
202
203    Ok(hex::encode(result))
204}
205
206/// Calculates SHA-256 checksums for multiple files concurrently.
207///
208/// This function processes multiple files in parallel using Tokio's thread pool,
209/// which can significantly improve performance when processing many files or
210/// large files on systems with multiple CPU cores.
211///
212/// # Arguments
213///
214/// * `paths` - A slice of file paths to process
215///
216/// # Returns
217///
218/// A vector of tuples containing each file path and its corresponding checksum,
219/// in the same order as the input paths. Returns an error if any file fails
220/// to be processed.
221///
222/// # Examples
223///
224/// ```rust,no_run
225/// use agpm_cli::utils::fs::metadata::calculate_checksums_parallel;
226/// use std::path::PathBuf;
227///
228/// # async fn example() -> anyhow::Result<()> {
229/// let files = vec![
230///     PathBuf::from("file1.txt"),
231///     PathBuf::from("file2.txt"),
232///     PathBuf::from("file3.txt"),
233/// ];
234///
235/// let results = calculate_checksums_parallel(&files).await?;
236/// for (path, checksum) in results {
237///     println!("{}: {}", path.display(), checksum);
238/// }
239/// # Ok(())
240/// # }
241/// ```
242///
243/// # Performance
244///
245/// This function uses `tokio::task::spawn_blocking` to run checksum calculations
246/// on separate threads, allowing for true parallelism. Benefits:
247/// - CPU-bound work doesn't block the async runtime
248/// - Multiple files processed simultaneously
249/// - Scales with available CPU cores
250/// - Maintains order of results
251///
252/// # Error Handling
253///
254/// If any file fails to be processed, the entire operation fails and returns
255/// an error with details about all failures. This "all-or-nothing" approach
256/// ensures data consistency.
257///
258/// # See Also
259///
260/// - [`calculate_checksum`] for single file processing
261/// - [`super::parallel::read_files_parallel`] for concurrent file reading
262pub async fn calculate_checksums_parallel(paths: &[PathBuf]) -> Result<Vec<(PathBuf, String)>> {
263    if paths.is_empty() {
264        return Ok(Vec::new());
265    }
266
267    let mut tasks = Vec::new();
268
269    for (index, path) in paths.iter().enumerate() {
270        let path = path.clone();
271        let task = tokio::task::spawn_blocking(move || {
272            calculate_checksum(&path).map(|checksum| (index, path, checksum))
273        });
274        tasks.push(task);
275    }
276
277    let results = try_join_all(tasks).await.context("Failed to join checksum calculation tasks")?;
278
279    let mut successes = Vec::new();
280    let mut errors = Vec::new();
281
282    for result in results {
283        match result {
284            Ok((index, path, checksum)) => successes.push((index, path, checksum)),
285            Err(e) => errors.push(e),
286        }
287    }
288
289    if !errors.is_empty() {
290        let error_msgs: Vec<String> =
291            errors.into_iter().map(|error| format!("  {error}")).collect();
292        return Err(anyhow::anyhow!(
293            "Failed to calculate checksums for {} files:\n{}",
294            error_msgs.len(),
295            error_msgs.join("\n")
296        ));
297    }
298
299    // Sort results by original index to maintain order
300    successes.sort_by_key(|(index, _, _)| *index);
301    let ordered_results: Vec<(PathBuf, String)> =
302        successes.into_iter().map(|(_, path, checksum)| (path, checksum)).collect();
303
304    Ok(ordered_results)
305}
306
307/// Checks if a file exists and is readable.
308///
309/// # Arguments
310/// * `path` - The path to check
311///
312/// # Returns
313/// true if the file exists and is readable, false otherwise
314pub fn file_exists_and_readable(path: &Path) -> bool {
315    path.exists() && path.is_file() && fs::metadata(path).is_ok()
316}
317
318/// Gets the modification time of a file.
319///
320/// # Arguments
321/// * `path` - The path to the file
322///
323/// # Returns
324/// The modification time as a `SystemTime`
325///
326/// # Errors
327/// Returns an error if the file metadata cannot be read
328pub fn get_modified_time(path: &Path) -> Result<std::time::SystemTime> {
329    let metadata = fs::metadata(path)
330        .with_context(|| format!("Failed to get metadata for: {}", path.display()))?;
331
332    metadata
333        .modified()
334        .with_context(|| format!("Failed to get modification time for: {}", path.display()))
335}
336
337/// Compares the modification times of two files.
338///
339/// # Arguments
340/// * `path1` - The first file path
341/// * `path2` - The second file path
342///
343/// # Returns
344/// - `Ok(Ordering::Less)` if path1 is older than path2
345/// - `Ok(Ordering::Greater)` if path1 is newer than path2
346/// - `Ok(Ordering::Equal)` if they have the same modification time
347///
348/// # Errors
349/// Returns an error if either file's metadata cannot be read
350pub fn compare_file_times(path1: &Path, path2: &Path) -> Result<std::cmp::Ordering> {
351    let time1 = get_modified_time(path1)?;
352    let time2 = get_modified_time(path2)?;
353
354    Ok(time1.cmp(&time2))
355}
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360    use tempfile::tempdir;
361
362    #[test]
363    fn test_dir_size() {
364        let temp = tempdir().unwrap();
365        let dir = temp.path();
366
367        std::fs::write(dir.join("file1.txt"), "12345").unwrap();
368        std::fs::write(dir.join("file2.txt"), "123456789").unwrap();
369        super::super::dirs::ensure_dir(&dir.join("subdir")).unwrap();
370        std::fs::write(dir.join("subdir/file3.txt"), "abc").unwrap();
371
372        let size = dir_size(dir).unwrap();
373        assert_eq!(size, 17); // 5 + 9 + 3
374    }
375
376    #[test]
377    fn test_calculate_checksum() {
378        let temp = tempdir().unwrap();
379        let file = temp.path().join("checksum_test.txt");
380        std::fs::write(&file, "test content").unwrap();
381
382        let checksum = calculate_checksum(&file).unwrap();
383        assert!(!checksum.is_empty());
384        assert_eq!(checksum.len(), 64); // SHA256 produces 64 hex chars
385    }
386
387    #[tokio::test]
388    async fn test_calculate_checksums_parallel() {
389        let temp = tempdir().unwrap();
390        let file1 = temp.path().join("file1.txt");
391        let file2 = temp.path().join("file2.txt");
392
393        std::fs::write(&file1, "content1").unwrap();
394        std::fs::write(&file2, "content2").unwrap();
395
396        let paths = vec![file1.clone(), file2.clone()];
397        let results = calculate_checksums_parallel(&paths).await.unwrap();
398
399        assert_eq!(results.len(), 2);
400        assert_eq!(results[0].0, file1);
401        assert_eq!(results[1].0, file2);
402        assert!(!results[0].1.is_empty());
403        assert!(!results[1].1.is_empty());
404    }
405
406    #[tokio::test]
407    async fn test_calculate_checksums_parallel_empty() {
408        let results = calculate_checksums_parallel(&[]).await.unwrap();
409        assert!(results.is_empty());
410    }
411
412    #[test]
413    fn test_calculate_checksum_edge_cases() {
414        let temp = tempdir().unwrap();
415
416        // Empty file
417        let empty = temp.path().join("empty.txt");
418        std::fs::write(&empty, "").unwrap();
419        let checksum = calculate_checksum(&empty).unwrap();
420        assert_eq!(checksum.len(), 64);
421        // SHA256 of empty string is well-known
422        assert_eq!(checksum, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
423
424        // Non-existent file
425        let nonexistent = temp.path().join("nonexistent.txt");
426        let result = calculate_checksum(&nonexistent);
427        assert!(result.is_err());
428
429        // Large file (1MB)
430        let large = temp.path().join("large.txt");
431        let large_content = vec![b'a'; 1024 * 1024];
432        std::fs::write(&large, &large_content).unwrap();
433        let checksum = calculate_checksum(&large).unwrap();
434        assert_eq!(checksum.len(), 64);
435    }
436
437    #[tokio::test]
438    async fn test_calculate_checksums_parallel_errors() {
439        let temp = tempdir().unwrap();
440        let valid = temp.path().join("valid.txt");
441        let invalid = temp.path().join("invalid.txt");
442
443        std::fs::write(&valid, "content").unwrap();
444
445        let paths = vec![valid.clone(), invalid.clone()];
446        let result = calculate_checksums_parallel(&paths).await;
447
448        // Should fail if any file is invalid
449        assert!(result.is_err());
450    }
451
452    #[test]
453    fn test_dir_size_edge_cases() {
454        let temp = tempdir().unwrap();
455
456        // Empty directory
457        let empty_dir = temp.path().join("empty");
458        super::super::dirs::ensure_dir(&empty_dir).unwrap();
459        assert_eq!(dir_size(&empty_dir).unwrap(), 0);
460
461        // Non-existent directory
462        let nonexistent = temp.path().join("nonexistent");
463        let result = dir_size(&nonexistent);
464        assert!(result.is_err());
465
466        // Directory with symlinks
467        #[cfg(unix)]
468        {
469            let dir = temp.path().join("with_symlink");
470            super::super::dirs::ensure_dir(&dir).unwrap();
471            std::fs::write(dir.join("file.txt"), "12345").unwrap();
472
473            let target = temp.path().join("target");
474            std::fs::write(&target, "123456789").unwrap();
475            std::os::unix::fs::symlink(&target, dir.join("link")).unwrap();
476
477            // The dir_size function behavior with symlinks depends on the implementation
478            // Just verify it doesn't crash and returns a reasonable size
479            let size = dir_size(&dir).unwrap();
480            // We should have at least the size of the real file
481            assert!(size >= 5);
482            // The size should be reasonable (not gigabytes)
483            assert!(size < 1_000_000);
484        }
485    }
486}