acme_disk_use/
disk_use.rs

1//! High-level disk usage analysis interface combining cache and scanner
2
3use std::{io, path::Path};
4
5use crate::cache::CacheManager;
6use crate::error::DiskUseError;
7use crate::scanner::{self, DirStat};
8
9/// Main interface for disk usage analysis with caching support
10pub struct DiskUse {
11    cache_manager: CacheManager,
12}
13
14impl DiskUse {
15    /// Create a new DiskUse instance with the specified cache file path
16    pub fn new(cache_path: impl AsRef<Path>) -> Self {
17        Self {
18            cache_manager: CacheManager::new(cache_path),
19        }
20    }
21
22    /// Create a new DiskUse instance using the default cache location
23    pub fn new_with_default_cache() -> Self {
24        Self::new(crate::get_default_cache_path())
25    }
26
27    /// Scan a directory and return its total size in bytes
28    ///
29    /// This method automatically:
30    /// - Loads from cache
31    /// - Scans only changed directories
32    /// - Saves the updated cache
33    pub fn scan(&mut self, path: impl AsRef<Path>) -> io::Result<u64> {
34        self.scan_with_options(path, false)
35    }
36
37    /// Scan a directory with options for ignoring cache
38    ///
39    /// # Arguments
40    /// * `path` - The directory path to scan
41    /// * `ignore_cache` - If true, performs a fresh scan without using cache
42    pub fn scan_with_options(
43        &mut self,
44        path: impl AsRef<Path>,
45        ignore_cache: bool,
46    ) -> io::Result<u64> {
47        let path = path.as_ref();
48
49        // Check if path exists first
50        if !path.exists() {
51            return Err(io::Error::from(DiskUseError::PathNotFound {
52                path: path.to_path_buf(),
53            }));
54        }
55
56        // Normalize path to avoid issues with symlinks and /private on macOS
57        let path_buf = match path.canonicalize() {
58            Ok(p) => p,
59            Err(err) => {
60                // If canonicalization fails (e.g., permission denied), use original path
61                if err.kind() == io::ErrorKind::PermissionDenied {
62                    return Err(io::Error::from(DiskUseError::PermissionDenied {
63                        path: path.to_path_buf(),
64                    }));
65                }
66                path.to_path_buf()
67            }
68        };
69
70        // Get existing cache entry for this root (unless ignoring cache)
71        let old_entry = if ignore_cache {
72            None
73        } else {
74            self.cache_manager.get(&path_buf)
75        };
76
77        // Scan the directory (will use cache for unchanged subdirectories)
78        let new_entry = scanner::scan_directory(&path_buf, old_entry)?;
79
80        // Get the total size before potentially moving new_entry
81        let total_size = new_entry.total_size();
82
83        // Update the cache with new results (unless ignoring cache)
84        if !ignore_cache {
85            self.cache_manager.update(&path_buf, new_entry);
86            // Cache will auto-save on drop
87        }
88
89        Ok(total_size)
90    }
91
92    /// Get detailed statistics for a previously scanned path
93    pub fn get_stats(&self, path: impl AsRef<Path>) -> Option<&DirStat> {
94        self.cache_manager.get(path.as_ref())
95    }
96
97    /// Get file count for a path
98    ///
99    /// # Arguments
100    /// * `path` - The path to get file count for
101    /// * `ignore_cache` - If true, counts files directly from filesystem instead of using cache
102    pub fn get_file_count(&self, path: impl AsRef<Path>, ignore_cache: bool) -> io::Result<u64> {
103        if ignore_cache {
104            scanner::count_files(path.as_ref())
105        } else {
106            Ok(self
107                .get_stats(path)
108                .map(|stats| stats.file_count())
109                .unwrap_or(0))
110        }
111    }
112
113    /// Save the current cache to disk
114    pub fn save_cache(&mut self) -> io::Result<()> {
115        self.cache_manager.save()
116    }
117
118    /// Clear all cache contents
119    pub fn clear_cache(&mut self) -> io::Result<()> {
120        self.cache_manager.clear()
121    }
122
123    /// Delete the cache file
124    pub fn delete_cache(&self) -> io::Result<()> {
125        self.cache_manager.delete()
126    }
127
128    /// Get the cache file path
129    pub fn cache_path(&self) -> &Path {
130        self.cache_manager.path()
131    }
132
133    /// Get all cached root directories
134    pub fn get_cached_roots(&self) -> Vec<&DirStat> {
135        self.cache_manager.get_roots()
136    }
137
138    /// Check if the cache is empty
139    pub fn is_cache_empty(&self) -> bool {
140        self.cache_manager.is_empty()
141    }
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147    use std::fs;
148    use tempfile::TempDir;
149
150    fn create_test_directory_structure(base: &Path) -> io::Result<()> {
151        fs::create_dir_all(base.join("subdir1"))?;
152        fs::create_dir_all(base.join("subdir2/nested"))?;
153
154        fs::write(base.join("file1.txt"), "Hello World")?;
155        fs::write(base.join("file2.txt"), "Test content")?;
156        fs::write(base.join("subdir1/nested_file.txt"), "Nested content here")?;
157        fs::write(base.join("subdir2/another.txt"), "More content")?;
158        fs::write(base.join("subdir2/nested/deep.txt"), "Deep file content")?;
159
160        Ok(())
161    }
162
163    #[test]
164    fn test_disk_use_with_cache() -> io::Result<()> {
165        // This test verifies the full `DiskUse` workflow with caching enabled.
166        // 1. It scans a directory and saves the cache.
167        // 2. It creates a new `DiskUse` instance and scans again.
168        // 3. It verifies that the second scan returns the correct size and file count
169        //    (which should be retrieved from the cache).
170        let temp_dir = TempDir::new()?;
171        let test_dir = temp_dir.path().join("test");
172        let cache_file = temp_dir.path().join("cache.bin");
173
174        fs::create_dir(&test_dir)?;
175        create_test_directory_structure(&test_dir)?;
176
177        let canonical_test_dir = test_dir.canonicalize()?;
178
179        {
180            let mut disk_use = DiskUse::new(&cache_file);
181            let size1 = disk_use.scan(&canonical_test_dir)?;
182            assert!(size1 >= 71);
183
184            // Force save by explicitly calling save_cache
185            disk_use.save_cache()?;
186        } // Drop happens here, ensuring save
187
188        assert!(cache_file.exists());
189
190        {
191            let mut disk_use = DiskUse::new(&cache_file);
192            let _size2 = disk_use.scan(&canonical_test_dir)?;
193            assert!(_size2 >= 71);
194
195            let file_count = disk_use.get_file_count(&canonical_test_dir, false)?;
196            assert_eq!(file_count, 5);
197        }
198
199        Ok(())
200    }
201
202    #[test]
203    fn test_disk_use_ignore_cache() -> io::Result<()> {
204        // This test verifies the `ignore_cache` functionality.
205        // 1. It scans a directory and populates the cache.
206        // 2. It modifies the directory (adds a file).
207        // 3. It scans again with `ignore_cache = true`.
208        // 4. It verifies that the scan result reflects the change, ignoring the stale cache.
209        let temp_dir = TempDir::new()?;
210        let test_dir = temp_dir.path().join("test");
211        let cache_file = temp_dir.path().join("cache.json");
212
213        fs::create_dir(&test_dir)?;
214        create_test_directory_structure(&test_dir)?;
215
216        let mut disk_use = DiskUse::new(&cache_file);
217
218        let size1 = disk_use.scan(&test_dir)?;
219        assert!(size1 >= 71);
220
221        fs::write(test_dir.join("new_file.txt"), "New content")?;
222
223        let _size2 = disk_use.scan(&test_dir)?;
224
225        let size3 = disk_use.scan_with_options(&test_dir, true)?;
226        assert!(size3 >= 82);
227
228        Ok(())
229    }
230
231    #[test]
232    fn test_cache_management() -> io::Result<()> {
233        // This test verifies the high-level cache management methods of `DiskUse`:
234        // 1. `save_cache()`: Explicitly saving the cache.
235        // 2. `clear_cache()`: Clearing the in-memory cache.
236        // 3. `delete_cache()`: Deleting the cache file.
237        let temp_dir = TempDir::new()?;
238        let test_dir = temp_dir.path().join("test");
239        let cache_file = temp_dir.path().join("cache.bin");
240
241        fs::create_dir(&test_dir)?;
242        create_test_directory_structure(&test_dir)?;
243
244        {
245            let mut disk_use = DiskUse::new(&cache_file);
246
247            disk_use.scan(&test_dir)?;
248            disk_use.save_cache()?; // Explicit save
249        } // Drop saves too
250
251        assert!(cache_file.exists());
252
253        {
254            let mut disk_use = DiskUse::new(&cache_file);
255            disk_use.clear_cache()?;
256
257            disk_use.delete_cache()?;
258        }
259
260        assert!(!cache_file.exists());
261
262        Ok(())
263    }
264
265    #[test]
266    fn test_get_file_count_subdirectory() -> io::Result<()> {
267        // This test verifies that `get_file_count` correctly retrieves the file count
268        // for a subdirectory from the cache, without needing to re-scan the filesystem.
269        // It scans a parent directory, then requests the count for a child directory.
270        let temp_dir = TempDir::new()?;
271        let test_dir = temp_dir.path().join("test");
272        let cache_file = temp_dir.path().join("cache.bin");
273
274        fs::create_dir(&test_dir)?;
275        fs::create_dir(test_dir.join("sub"))?;
276        fs::write(test_dir.join("sub/file.txt"), "content")?;
277
278        let mut disk_use = DiskUse::new(&cache_file);
279        disk_use.scan(&test_dir)?; // Scans /test, should cache /test/sub
280
281        // Try to get count for /test/sub from cache
282        let count = disk_use.get_file_count(test_dir.join("sub"), false)?;
283        assert_eq!(count, 1);
284
285        Ok(())
286    }
287
288    #[test]
289    #[cfg(unix)]
290    fn test_compare_with_du() -> io::Result<()> {
291        // This test compares the library output with the system `du` command.
292        // It ensures that our block-based size calculation matches the system's.
293        use std::process::Command;
294
295        let temp_dir = TempDir::new()?;
296        let test_dir = temp_dir.path().join("test_du");
297        fs::create_dir(&test_dir)?;
298
299        // Create some files with known content
300        fs::write(test_dir.join("file1.txt"), "Hello World")?; // Small file
301                                                               // Create a larger file to ensure multiple blocks
302        let large_content = vec![0u8; 8192]; // 8KB
303        fs::write(test_dir.join("file2.bin"), &large_content)?;
304
305        let mut disk_use = DiskUse::new_with_default_cache();
306        let lib_size = disk_use.scan_with_options(&test_dir, true)?;
307
308        // Run `du -s -k` (kilobytes) and convert to bytes
309        // Note: macOS du -s uses 512-byte blocks by default, but -k forces 1024-byte blocks.
310        // However, our library uses 512-byte blocks.
311        // Let's use `du -s` which returns 512-byte blocks on macOS/BSD and usually 1024 on GNU/Linux.
312        // To be safe, let's use `du -k` and multiply by 1024, but precision might be lost.
313        // Better: use `du -B1` on GNU or just check if it's close enough.
314
315        // Actually, let's try to match exact block count if possible.
316        // On macOS: `du -s` returns 512-byte blocks.
317        // On Linux: `du -s` usually returns 1024-byte blocks (check BLOCK_SIZE env).
318
319        let output = Command::new("du")
320            .arg("-s")
321            .arg("-k") // Force 1024-byte blocks for consistency across platforms
322            .arg(&test_dir)
323            .output()?;
324
325        if !output.status.success() {
326            // If du fails (e.g. not found), skip the test
327            return Ok(());
328        }
329
330        let stdout = String::from_utf8_lossy(&output.stdout);
331        let du_kblocks: u64 = stdout.split_whitespace().next().unwrap().parse().unwrap();
332
333        let du_bytes = du_kblocks * 1024;
334
335        // Allow for some small difference due to block alignment/metadata
336        // But ideally they should be very close.
337        // Since `du -k` rounds up to nearest 1024, and we sum up 512-byte blocks,
338        // our result might be slightly different but comparable.
339
340        // Let's just print them for now and assert they are within a reasonable margin (e.g. 4KB)
341        println!("Library size: {}, du size: {}", lib_size, du_bytes);
342
343        let diff = lib_size.abs_diff(du_bytes);
344
345        assert!(
346            diff <= 4096,
347            "Library size {} differs significantly from du size {}",
348            lib_size,
349            du_bytes
350        );
351
352        Ok(())
353    }
354
355    #[test]
356    fn test_scan_nonexistent_directory() {
357        // Test that scanning a nonexistent directory returns an appropriate error
358        let temp_dir = TempDir::new().unwrap();
359        let cache_file = temp_dir.path().join("cache.bin");
360        let mut disk_use = DiskUse::new(&cache_file);
361
362        let nonexistent = "/nonexistent/path/that/does/not/exist";
363        let result = disk_use.scan(nonexistent);
364
365        assert!(result.is_err());
366        let err = result.unwrap_err();
367        assert!(
368            err.to_string().contains("does not exist"),
369            "Error should indicate path doesn't exist: {}",
370            err
371        );
372    }
373}