acme_disk_use/
disk_use.rs

1//! High-level disk usage analysis interface combining cache and scanner
2
3use std::{io, path::Path};
4
5use crate::cache::CacheManager;
6use crate::scanner::{self, DirStat};
7
8/// Main interface for disk usage analysis with caching support
9pub struct DiskUse {
10    cache_manager: CacheManager,
11}
12
13impl DiskUse {
14    /// Create a new DiskUse instance with the specified cache file path
15    pub fn new(cache_path: impl AsRef<Path>) -> Self {
16        Self {
17            cache_manager: CacheManager::new(cache_path),
18        }
19    }
20
21    /// Create a new DiskUse instance using the default cache location
22    pub fn new_with_default_cache() -> Self {
23        Self::new(crate::get_default_cache_path())
24    }
25
26    /// Scan a directory and return its total size in bytes
27    ///
28    /// This method automatically:
29    /// - Loads from cache
30    /// - Scans only changed directories
31    /// - Saves the updated cache
32    pub fn scan(&mut self, path: impl AsRef<Path>) -> io::Result<u64> {
33        self.scan_with_options(path, false)
34    }
35
36    /// Scan a directory with options for ignoring cache
37    ///
38    /// # Arguments
39    /// * `path` - The directory path to scan
40    /// * `ignore_cache` - If true, performs a fresh scan without using cache
41    pub fn scan_with_options(
42        &mut self,
43        path: impl AsRef<Path>,
44        ignore_cache: bool,
45    ) -> io::Result<u64> {
46        let path = path.as_ref();
47
48        // Normalize path to avoid issues with symlinks and /private on macOS
49        let path_buf = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
50
51        // Get existing cache entry for this root (unless ignoring cache)
52        let old_entry = if ignore_cache {
53            None
54        } else {
55            self.cache_manager.get(&path_buf)
56        };
57
58        // Scan the directory (will use cache for unchanged subdirectories)
59        let new_entry = scanner::scan_directory(&path_buf, old_entry)?;
60
61        // Get the total size before potentially moving new_entry
62        let total_size = new_entry.total_size();
63
64        // Update the cache with new results (unless ignoring cache)
65        if !ignore_cache {
66            self.cache_manager.update(&path_buf, new_entry);
67            // Cache will auto-save on drop
68        }
69
70        Ok(total_size)
71    }
72
73    /// Get detailed statistics for a previously scanned path
74    pub fn get_stats(&self, path: impl AsRef<Path>) -> Option<&DirStat> {
75        self.cache_manager.get(path.as_ref())
76    }
77
78    /// Get file count for a path
79    ///
80    /// # Arguments
81    /// * `path` - The path to get file count for
82    /// * `ignore_cache` - If true, counts files directly from filesystem instead of using cache
83    pub fn get_file_count(&self, path: impl AsRef<Path>, ignore_cache: bool) -> io::Result<u64> {
84        if ignore_cache {
85            scanner::count_files(path.as_ref())
86        } else {
87            Ok(self
88                .get_stats(path)
89                .map(|stats| stats.file_count())
90                .unwrap_or(0))
91        }
92    }
93
94    /// Save the current cache to disk
95    pub fn save_cache(&mut self) -> io::Result<()> {
96        self.cache_manager.save()
97    }
98
99    /// Clear all cache contents
100    pub fn clear_cache(&mut self) -> io::Result<()> {
101        self.cache_manager.clear()
102    }
103
104    /// Delete the cache file
105    pub fn delete_cache(&self) -> io::Result<()> {
106        self.cache_manager.delete()
107    }
108
109    /// Get the cache file path
110    pub fn cache_path(&self) -> &Path {
111        self.cache_manager.path()
112    }
113}
114
115#[cfg(test)]
116mod tests {
117    use super::*;
118    use std::fs;
119    use tempfile::TempDir;
120
121    fn create_test_directory_structure(base: &Path) -> io::Result<()> {
122        fs::create_dir_all(base.join("subdir1"))?;
123        fs::create_dir_all(base.join("subdir2/nested"))?;
124
125        fs::write(base.join("file1.txt"), "Hello World")?;
126        fs::write(base.join("file2.txt"), "Test content")?;
127        fs::write(base.join("subdir1/nested_file.txt"), "Nested content here")?;
128        fs::write(base.join("subdir2/another.txt"), "More content")?;
129        fs::write(base.join("subdir2/nested/deep.txt"), "Deep file content")?;
130
131        Ok(())
132    }
133
134    #[test]
135    fn test_disk_use_with_cache() -> io::Result<()> {
136        // This test verifies the full `DiskUse` workflow with caching enabled.
137        // 1. It scans a directory and saves the cache.
138        // 2. It creates a new `DiskUse` instance and scans again.
139        // 3. It verifies that the second scan returns the correct size and file count
140        //    (which should be retrieved from the cache).
141        let temp_dir = TempDir::new()?;
142        let test_dir = temp_dir.path().join("test");
143        let cache_file = temp_dir.path().join("cache.bin");
144
145        fs::create_dir(&test_dir)?;
146        create_test_directory_structure(&test_dir)?;
147
148        let canonical_test_dir = test_dir.canonicalize()?;
149
150        {
151            let mut disk_use = DiskUse::new(&cache_file);
152            let size1 = disk_use.scan(&canonical_test_dir)?;
153            assert!(size1 >= 71);
154
155            // Force save by explicitly calling save_cache
156            disk_use.save_cache()?;
157        } // Drop happens here, ensuring save
158
159        assert!(cache_file.exists());
160
161        {
162            let mut disk_use = DiskUse::new(&cache_file);
163            let _size2 = disk_use.scan(&canonical_test_dir)?;
164            assert!(_size2 >= 71);
165
166            let file_count = disk_use.get_file_count(&canonical_test_dir, false)?;
167            assert_eq!(file_count, 5);
168        }
169
170        Ok(())
171    }
172
173    #[test]
174    fn test_disk_use_ignore_cache() -> io::Result<()> {
175        // This test verifies the `ignore_cache` functionality.
176        // 1. It scans a directory and populates the cache.
177        // 2. It modifies the directory (adds a file).
178        // 3. It scans again with `ignore_cache = true`.
179        // 4. It verifies that the scan result reflects the change, ignoring the stale cache.
180        let temp_dir = TempDir::new()?;
181        let test_dir = temp_dir.path().join("test");
182        let cache_file = temp_dir.path().join("cache.json");
183
184        fs::create_dir(&test_dir)?;
185        create_test_directory_structure(&test_dir)?;
186
187        let mut disk_use = DiskUse::new(&cache_file);
188
189        let size1 = disk_use.scan(&test_dir)?;
190        assert!(size1 >= 71);
191
192        fs::write(test_dir.join("new_file.txt"), "New content")?;
193
194        let _size2 = disk_use.scan(&test_dir)?;
195
196        let size3 = disk_use.scan_with_options(&test_dir, true)?;
197        assert!(size3 >= 82);
198
199        Ok(())
200    }
201
202    #[test]
203    fn test_cache_management() -> io::Result<()> {
204        // This test verifies the high-level cache management methods of `DiskUse`:
205        // 1. `save_cache()`: Explicitly saving the cache.
206        // 2. `clear_cache()`: Clearing the in-memory cache.
207        // 3. `delete_cache()`: Deleting the cache file.
208        let temp_dir = TempDir::new()?;
209        let test_dir = temp_dir.path().join("test");
210        let cache_file = temp_dir.path().join("cache.bin");
211
212        fs::create_dir(&test_dir)?;
213        create_test_directory_structure(&test_dir)?;
214
215        {
216            let mut disk_use = DiskUse::new(&cache_file);
217
218            disk_use.scan(&test_dir)?;
219            disk_use.save_cache()?; // Explicit save
220        } // Drop saves too
221
222        assert!(cache_file.exists());
223
224        {
225            let mut disk_use = DiskUse::new(&cache_file);
226            disk_use.clear_cache()?;
227
228            disk_use.delete_cache()?;
229        }
230
231        assert!(!cache_file.exists());
232
233        Ok(())
234    }
235
236    #[test]
237    fn test_get_file_count_subdirectory() -> io::Result<()> {
238        // This test verifies that `get_file_count` correctly retrieves the file count
239        // for a subdirectory from the cache, without needing to re-scan the filesystem.
240        // It scans a parent directory, then requests the count for a child directory.
241        let temp_dir = TempDir::new()?;
242        let test_dir = temp_dir.path().join("test");
243        let cache_file = temp_dir.path().join("cache.bin");
244
245        fs::create_dir(&test_dir)?;
246        fs::create_dir(test_dir.join("sub"))?;
247        fs::write(test_dir.join("sub/file.txt"), "content")?;
248
249        let mut disk_use = DiskUse::new(&cache_file);
250        disk_use.scan(&test_dir)?; // Scans /test, should cache /test/sub
251
252        // Try to get count for /test/sub from cache
253        let count = disk_use.get_file_count(test_dir.join("sub"), false)?;
254        assert_eq!(count, 1);
255
256        Ok(())
257    }
258
259    #[test]
260    #[cfg(unix)]
261    fn test_compare_with_du() -> io::Result<()> {
262        // This test compares the library output with the system `du` command.
263        // It ensures that our block-based size calculation matches the system's.
264        use std::process::Command;
265
266        let temp_dir = TempDir::new()?;
267        let test_dir = temp_dir.path().join("test_du");
268        fs::create_dir(&test_dir)?;
269
270        // Create some files with known content
271        fs::write(test_dir.join("file1.txt"), "Hello World")?; // Small file
272                                                               // Create a larger file to ensure multiple blocks
273        let large_content = vec![0u8; 8192]; // 8KB
274        fs::write(test_dir.join("file2.bin"), &large_content)?;
275
276        let mut disk_use = DiskUse::new_with_default_cache();
277        let lib_size = disk_use.scan_with_options(&test_dir, true)?;
278
279        // Run `du -s -k` (kilobytes) and convert to bytes
280        // Note: macOS du -s uses 512-byte blocks by default, but -k forces 1024-byte blocks.
281        // However, our library uses 512-byte blocks.
282        // Let's use `du -s` which returns 512-byte blocks on macOS/BSD and usually 1024 on GNU/Linux.
283        // To be safe, let's use `du -k` and multiply by 1024, but precision might be lost.
284        // Better: use `du -B1` on GNU or just check if it's close enough.
285
286        // Actually, let's try to match exact block count if possible.
287        // On macOS: `du -s` returns 512-byte blocks.
288        // On Linux: `du -s` usually returns 1024-byte blocks (check BLOCK_SIZE env).
289
290        let output = Command::new("du")
291            .arg("-s")
292            .arg("-k") // Force 1024-byte blocks for consistency across platforms
293            .arg(&test_dir)
294            .output()?;
295
296        if !output.status.success() {
297            // If du fails (e.g. not found), skip the test
298            return Ok(());
299        }
300
301        let stdout = String::from_utf8_lossy(&output.stdout);
302        let du_kblocks: u64 = stdout.split_whitespace().next().unwrap().parse().unwrap();
303
304        let du_bytes = du_kblocks * 1024;
305
306        // Allow for some small difference due to block alignment/metadata
307        // But ideally they should be very close.
308        // Since `du -k` rounds up to nearest 1024, and we sum up 512-byte blocks,
309        // our result might be slightly different but comparable.
310
311        // Let's just print them for now and assert they are within a reasonable margin (e.g. 4KB)
312        println!("Library size: {}, du size: {}", lib_size, du_bytes);
313
314        let diff = lib_size.abs_diff(du_bytes);
315
316        assert!(
317            diff <= 4096,
318            "Library size {} differs significantly from du size {}",
319            lib_size,
320            du_bytes
321        );
322
323        Ok(())
324    }
325}