acme_disk_use/
disk_use.rs

1//! High-level disk usage analysis interface combining cache and scanner
2
3use std::{io, path::Path};
4
5use crate::cache::CacheManager;
6use crate::error::DiskUseError;
7use crate::scanner::{self, DirStat};
8
9/// Main interface for disk usage analysis with caching support
10pub struct DiskUse {
11    cache_manager: CacheManager,
12}
13
14impl DiskUse {
15    /// Create a new DiskUse instance with the specified cache file path
16    pub fn new(cache_path: impl AsRef<Path>) -> Self {
17        Self {
18            cache_manager: CacheManager::new(cache_path),
19        }
20    }
21
22    /// Create a new DiskUse instance using the default cache location
23    pub fn new_with_default_cache() -> Self {
24        Self::new(crate::get_default_cache_path())
25    }
26
27    /// Scan a directory and return its total size in bytes
28    ///
29    /// This method automatically:
30    /// - Loads from cache
31    /// - Scans only changed directories
32    /// - Saves the updated cache
33    pub fn scan(&mut self, path: impl AsRef<Path>) -> io::Result<u64> {
34        self.scan_with_options(path, false)
35    }
36
37    /// Scan a directory with options for ignoring cache
38    ///
39    /// # Arguments
40    /// * `path` - The directory path to scan
41    /// * `ignore_cache` - If true, performs a fresh scan without using cache
42    pub fn scan_with_options(
43        &mut self,
44        path: impl AsRef<Path>,
45        ignore_cache: bool,
46    ) -> io::Result<u64> {
47        let path = path.as_ref();
48
49        // Check if path exists first
50        if !path.exists() {
51            return Err(io::Error::from(DiskUseError::PathNotFound {
52                path: path.to_path_buf(),
53            }));
54        }
55
56        // Normalize path to avoid issues with symlinks and /private on macOS
57        let path_buf = match path.canonicalize() {
58            Ok(p) => p,
59            Err(err) => {
60                // If canonicalization fails (e.g., permission denied), use original path
61                if err.kind() == io::ErrorKind::PermissionDenied {
62                    return Err(io::Error::from(DiskUseError::PermissionDenied {
63                        path: path.to_path_buf(),
64                    }));
65                }
66                path.to_path_buf()
67            }
68        };
69
70        // Get existing cache entry for this root (unless ignoring cache)
71        let old_entry = if ignore_cache {
72            None
73        } else {
74            self.cache_manager.get(&path_buf)
75        };
76
77        // Scan the directory (will use cache for unchanged subdirectories)
78        let new_entry = scanner::scan_directory(&path_buf, old_entry)?;
79
80        // Get the total size before potentially moving new_entry
81        let total_size = new_entry.total_size();
82
83        // Update the cache with new results (unless ignoring cache)
84        if !ignore_cache {
85            self.cache_manager.update(&path_buf, new_entry);
86            // Cache will auto-save on drop
87        }
88
89        Ok(total_size)
90    }
91
92    /// Get detailed statistics for a previously scanned path
93    pub fn get_stats(&self, path: impl AsRef<Path>) -> Option<&DirStat> {
94        self.cache_manager.get(path.as_ref())
95    }
96
97    /// Get file count for a path
98    ///
99    /// # Arguments
100    /// * `path` - The path to get file count for
101    /// * `ignore_cache` - If true, counts files directly from filesystem instead of using cache
102    pub fn get_file_count(&self, path: impl AsRef<Path>, ignore_cache: bool) -> io::Result<u64> {
103        if ignore_cache {
104            scanner::count_files(path.as_ref())
105        } else {
106            Ok(self
107                .get_stats(path)
108                .map(|stats| stats.file_count())
109                .unwrap_or(0))
110        }
111    }
112
113    /// Save the current cache to disk
114    pub fn save_cache(&mut self) -> io::Result<()> {
115        self.cache_manager.save()
116    }
117
118    /// Clear all cache contents
119    pub fn clear_cache(&mut self) -> io::Result<()> {
120        self.cache_manager.clear()
121    }
122
123    /// Delete the cache file
124    pub fn delete_cache(&self) -> io::Result<()> {
125        self.cache_manager.delete()
126    }
127
128    /// Get the cache file path
129    pub fn cache_path(&self) -> &Path {
130        self.cache_manager.path()
131    }
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137    use std::fs;
138    use tempfile::TempDir;
139
140    fn create_test_directory_structure(base: &Path) -> io::Result<()> {
141        fs::create_dir_all(base.join("subdir1"))?;
142        fs::create_dir_all(base.join("subdir2/nested"))?;
143
144        fs::write(base.join("file1.txt"), "Hello World")?;
145        fs::write(base.join("file2.txt"), "Test content")?;
146        fs::write(base.join("subdir1/nested_file.txt"), "Nested content here")?;
147        fs::write(base.join("subdir2/another.txt"), "More content")?;
148        fs::write(base.join("subdir2/nested/deep.txt"), "Deep file content")?;
149
150        Ok(())
151    }
152
153    #[test]
154    fn test_disk_use_with_cache() -> io::Result<()> {
155        // This test verifies the full `DiskUse` workflow with caching enabled.
156        // 1. It scans a directory and saves the cache.
157        // 2. It creates a new `DiskUse` instance and scans again.
158        // 3. It verifies that the second scan returns the correct size and file count
159        //    (which should be retrieved from the cache).
160        let temp_dir = TempDir::new()?;
161        let test_dir = temp_dir.path().join("test");
162        let cache_file = temp_dir.path().join("cache.bin");
163
164        fs::create_dir(&test_dir)?;
165        create_test_directory_structure(&test_dir)?;
166
167        let canonical_test_dir = test_dir.canonicalize()?;
168
169        {
170            let mut disk_use = DiskUse::new(&cache_file);
171            let size1 = disk_use.scan(&canonical_test_dir)?;
172            assert!(size1 >= 71);
173
174            // Force save by explicitly calling save_cache
175            disk_use.save_cache()?;
176        } // Drop happens here, ensuring save
177
178        assert!(cache_file.exists());
179
180        {
181            let mut disk_use = DiskUse::new(&cache_file);
182            let _size2 = disk_use.scan(&canonical_test_dir)?;
183            assert!(_size2 >= 71);
184
185            let file_count = disk_use.get_file_count(&canonical_test_dir, false)?;
186            assert_eq!(file_count, 5);
187        }
188
189        Ok(())
190    }
191
192    #[test]
193    fn test_disk_use_ignore_cache() -> io::Result<()> {
194        // This test verifies the `ignore_cache` functionality.
195        // 1. It scans a directory and populates the cache.
196        // 2. It modifies the directory (adds a file).
197        // 3. It scans again with `ignore_cache = true`.
198        // 4. It verifies that the scan result reflects the change, ignoring the stale cache.
199        let temp_dir = TempDir::new()?;
200        let test_dir = temp_dir.path().join("test");
201        let cache_file = temp_dir.path().join("cache.json");
202
203        fs::create_dir(&test_dir)?;
204        create_test_directory_structure(&test_dir)?;
205
206        let mut disk_use = DiskUse::new(&cache_file);
207
208        let size1 = disk_use.scan(&test_dir)?;
209        assert!(size1 >= 71);
210
211        fs::write(test_dir.join("new_file.txt"), "New content")?;
212
213        let _size2 = disk_use.scan(&test_dir)?;
214
215        let size3 = disk_use.scan_with_options(&test_dir, true)?;
216        assert!(size3 >= 82);
217
218        Ok(())
219    }
220
221    #[test]
222    fn test_cache_management() -> io::Result<()> {
223        // This test verifies the high-level cache management methods of `DiskUse`:
224        // 1. `save_cache()`: Explicitly saving the cache.
225        // 2. `clear_cache()`: Clearing the in-memory cache.
226        // 3. `delete_cache()`: Deleting the cache file.
227        let temp_dir = TempDir::new()?;
228        let test_dir = temp_dir.path().join("test");
229        let cache_file = temp_dir.path().join("cache.bin");
230
231        fs::create_dir(&test_dir)?;
232        create_test_directory_structure(&test_dir)?;
233
234        {
235            let mut disk_use = DiskUse::new(&cache_file);
236
237            disk_use.scan(&test_dir)?;
238            disk_use.save_cache()?; // Explicit save
239        } // Drop saves too
240
241        assert!(cache_file.exists());
242
243        {
244            let mut disk_use = DiskUse::new(&cache_file);
245            disk_use.clear_cache()?;
246
247            disk_use.delete_cache()?;
248        }
249
250        assert!(!cache_file.exists());
251
252        Ok(())
253    }
254
255    #[test]
256    fn test_get_file_count_subdirectory() -> io::Result<()> {
257        // This test verifies that `get_file_count` correctly retrieves the file count
258        // for a subdirectory from the cache, without needing to re-scan the filesystem.
259        // It scans a parent directory, then requests the count for a child directory.
260        let temp_dir = TempDir::new()?;
261        let test_dir = temp_dir.path().join("test");
262        let cache_file = temp_dir.path().join("cache.bin");
263
264        fs::create_dir(&test_dir)?;
265        fs::create_dir(test_dir.join("sub"))?;
266        fs::write(test_dir.join("sub/file.txt"), "content")?;
267
268        let mut disk_use = DiskUse::new(&cache_file);
269        disk_use.scan(&test_dir)?; // Scans /test, should cache /test/sub
270
271        // Try to get count for /test/sub from cache
272        let count = disk_use.get_file_count(test_dir.join("sub"), false)?;
273        assert_eq!(count, 1);
274
275        Ok(())
276    }
277
278    #[test]
279    #[cfg(unix)]
280    fn test_compare_with_du() -> io::Result<()> {
281        // This test compares the library output with the system `du` command.
282        // It ensures that our block-based size calculation matches the system's.
283        use std::process::Command;
284
285        let temp_dir = TempDir::new()?;
286        let test_dir = temp_dir.path().join("test_du");
287        fs::create_dir(&test_dir)?;
288
289        // Create some files with known content
290        fs::write(test_dir.join("file1.txt"), "Hello World")?; // Small file
291                                                               // Create a larger file to ensure multiple blocks
292        let large_content = vec![0u8; 8192]; // 8KB
293        fs::write(test_dir.join("file2.bin"), &large_content)?;
294
295        let mut disk_use = DiskUse::new_with_default_cache();
296        let lib_size = disk_use.scan_with_options(&test_dir, true)?;
297
298        // Run `du -s -k` (kilobytes) and convert to bytes
299        // Note: macOS du -s uses 512-byte blocks by default, but -k forces 1024-byte blocks.
300        // However, our library uses 512-byte blocks.
301        // Let's use `du -s` which returns 512-byte blocks on macOS/BSD and usually 1024 on GNU/Linux.
302        // To be safe, let's use `du -k` and multiply by 1024, but precision might be lost.
303        // Better: use `du -B1` on GNU or just check if it's close enough.
304
305        // Actually, let's try to match exact block count if possible.
306        // On macOS: `du -s` returns 512-byte blocks.
307        // On Linux: `du -s` usually returns 1024-byte blocks (check BLOCK_SIZE env).
308
309        let output = Command::new("du")
310            .arg("-s")
311            .arg("-k") // Force 1024-byte blocks for consistency across platforms
312            .arg(&test_dir)
313            .output()?;
314
315        if !output.status.success() {
316            // If du fails (e.g. not found), skip the test
317            return Ok(());
318        }
319
320        let stdout = String::from_utf8_lossy(&output.stdout);
321        let du_kblocks: u64 = stdout.split_whitespace().next().unwrap().parse().unwrap();
322
323        let du_bytes = du_kblocks * 1024;
324
325        // Allow for some small difference due to block alignment/metadata
326        // But ideally they should be very close.
327        // Since `du -k` rounds up to nearest 1024, and we sum up 512-byte blocks,
328        // our result might be slightly different but comparable.
329
330        // Let's just print them for now and assert they are within a reasonable margin (e.g. 4KB)
331        println!("Library size: {}, du size: {}", lib_size, du_bytes);
332
333        let diff = lib_size.abs_diff(du_bytes);
334
335        assert!(
336            diff <= 4096,
337            "Library size {} differs significantly from du size {}",
338            lib_size,
339            du_bytes
340        );
341
342        Ok(())
343    }
344
345    #[test]
346    fn test_scan_nonexistent_directory() {
347        // Test that scanning a nonexistent directory returns an appropriate error
348        let temp_dir = TempDir::new().unwrap();
349        let cache_file = temp_dir.path().join("cache.bin");
350        let mut disk_use = DiskUse::new(&cache_file);
351
352        let nonexistent = "/nonexistent/path/that/does/not/exist";
353        let result = disk_use.scan(nonexistent);
354
355        assert!(result.is_err());
356        let err = result.unwrap_err();
357        assert!(
358            err.to_string().contains("does not exist"),
359            "Error should indicate path doesn't exist: {}",
360            err
361        );
362    }
363}