Skip to main content

socket_patch_core/utils/
cleanup_blobs.rs

1use std::path::Path;
2
3use crate::manifest::operations::get_after_hash_blobs;
4use crate::manifest::schema::PatchManifest;
5
6/// Result of a blob cleanup operation.
7#[derive(Debug, Clone)]
8pub struct CleanupResult {
9    pub blobs_checked: usize,
10    pub blobs_removed: usize,
11    pub bytes_freed: u64,
12    pub removed_blobs: Vec<String>,
13}
14
15/// Cleans up unused blob files from the blobs directory.
16///
17/// Analyzes the manifest to determine which afterHash blobs are needed for applying patches,
18/// then removes any blob files that are not needed.
19///
20/// Note: beforeHash blobs are considered "unused" because they are downloaded on-demand
21/// during rollback operations. This saves disk space since beforeHash blobs are only
22/// needed for rollback, not for applying patches.
23pub async fn cleanup_unused_blobs(
24    manifest: &PatchManifest,
25    blobs_dir: &Path,
26    dry_run: bool,
27) -> Result<CleanupResult, std::io::Error> {
28    // Only keep afterHash blobs - beforeHash blobs are downloaded on-demand during rollback
29    let used_blobs = get_after_hash_blobs(manifest);
30
31    // Check if blobs directory exists
32    if tokio::fs::metadata(blobs_dir).await.is_err() {
33        // Blobs directory doesn't exist, nothing to clean up
34        return Ok(CleanupResult {
35            blobs_checked: 0,
36            blobs_removed: 0,
37            bytes_freed: 0,
38            removed_blobs: vec![],
39        });
40    }
41
42    // Read all files in the blobs directory
43    let mut read_dir = tokio::fs::read_dir(blobs_dir).await?;
44    let mut blob_entries = Vec::new();
45
46    while let Some(entry) = read_dir.next_entry().await? {
47        blob_entries.push(entry);
48    }
49
50    let mut result = CleanupResult {
51        blobs_checked: blob_entries.len(),
52        blobs_removed: 0,
53        bytes_freed: 0,
54        removed_blobs: vec![],
55    };
56
57    // Check each blob file
58    for entry in &blob_entries {
59        let file_name = entry.file_name();
60        let file_name_str = file_name.to_string_lossy().to_string();
61
62        // Skip hidden files and directories
63        if file_name_str.starts_with('.') {
64            continue;
65        }
66
67        let blob_path = blobs_dir.join(&file_name_str);
68
69        // Check if it's a file (not a directory)
70        let metadata = tokio::fs::metadata(&blob_path).await?;
71        if !metadata.is_file() {
72            continue;
73        }
74
75        // If this blob is not in use, remove it
76        if !used_blobs.contains(&file_name_str) {
77            result.blobs_removed += 1;
78            result.bytes_freed += metadata.len();
79            result.removed_blobs.push(file_name_str);
80
81            if !dry_run {
82                tokio::fs::remove_file(&blob_path).await?;
83            }
84        }
85    }
86
87    Ok(result)
88}
89
90/// Formats the cleanup result for human-readable output.
91pub fn format_cleanup_result(result: &CleanupResult, dry_run: bool) -> String {
92    if result.blobs_checked == 0 {
93        return "No blobs directory found, nothing to clean up.".to_string();
94    }
95
96    if result.blobs_removed == 0 {
97        return format!(
98            "Checked {} blob(s), all are in use.",
99            result.blobs_checked
100        );
101    }
102
103    let action = if dry_run { "Would remove" } else { "Removed" };
104    let bytes_formatted = format_bytes(result.bytes_freed);
105
106    let mut output = format!(
107        "{} {} unused blob(s) ({} freed)",
108        action, result.blobs_removed, bytes_formatted
109    );
110
111    if dry_run && !result.removed_blobs.is_empty() {
112        output.push_str("\nUnused blobs:");
113        for blob in &result.removed_blobs {
114            output.push_str(&format!("\n  - {}", blob));
115        }
116    }
117
118    output
119}
120
121/// Formats bytes into a human-readable string.
122pub fn format_bytes(bytes: u64) -> String {
123    if bytes == 0 {
124        return "0 B".to_string();
125    }
126
127    const KB: u64 = 1024;
128    const MB: u64 = 1024 * 1024;
129    const GB: u64 = 1024 * 1024 * 1024;
130
131    if bytes < KB {
132        format!("{} B", bytes)
133    } else if bytes < MB {
134        format!("{:.2} KB", bytes as f64 / KB as f64)
135    } else if bytes < GB {
136        format!("{:.2} MB", bytes as f64 / MB as f64)
137    } else {
138        format!("{:.2} GB", bytes as f64 / GB as f64)
139    }
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145    use crate::manifest::schema::{PatchFileInfo, PatchManifest, PatchRecord};
146    use std::collections::HashMap;
147
148    const TEST_UUID: &str = "11111111-1111-4111-8111-111111111111";
149    const BEFORE_HASH_1: &str =
150        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1111";
151    const AFTER_HASH_1: &str =
152        "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb1111";
153    const BEFORE_HASH_2: &str =
154        "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc2222";
155    const AFTER_HASH_2: &str =
156        "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd2222";
157    const ORPHAN_HASH: &str =
158        "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo";
159
160    fn create_test_manifest() -> PatchManifest {
161        let mut files = HashMap::new();
162        files.insert(
163            "package/index.js".to_string(),
164            PatchFileInfo {
165                before_hash: BEFORE_HASH_1.to_string(),
166                after_hash: AFTER_HASH_1.to_string(),
167            },
168        );
169        files.insert(
170            "package/lib/utils.js".to_string(),
171            PatchFileInfo {
172                before_hash: BEFORE_HASH_2.to_string(),
173                after_hash: AFTER_HASH_2.to_string(),
174            },
175        );
176
177        let mut patches = HashMap::new();
178        patches.insert(
179            "pkg:npm/pkg-a@1.0.0".to_string(),
180            PatchRecord {
181                uuid: TEST_UUID.to_string(),
182                exported_at: "2024-01-01T00:00:00Z".to_string(),
183                files,
184                vulnerabilities: HashMap::new(),
185                description: "Test patch".to_string(),
186                license: "MIT".to_string(),
187                tier: "free".to_string(),
188            },
189        );
190
191        PatchManifest { patches }
192    }
193
194    #[tokio::test]
195    async fn test_cleanup_keeps_after_hash_removes_orphan() {
196        let dir = tempfile::tempdir().unwrap();
197        let blobs_dir = dir.path().join("blobs");
198        tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
199
200        let manifest = create_test_manifest();
201
202        // Create blobs on disk
203        tokio::fs::write(blobs_dir.join(AFTER_HASH_1), "after content 1")
204            .await
205            .unwrap();
206        tokio::fs::write(blobs_dir.join(AFTER_HASH_2), "after content 2")
207            .await
208            .unwrap();
209        tokio::fs::write(blobs_dir.join(ORPHAN_HASH), "orphan content")
210            .await
211            .unwrap();
212
213        let result = cleanup_unused_blobs(&manifest, &blobs_dir, false)
214            .await
215            .unwrap();
216
217        // Should remove only the orphan blob
218        assert_eq!(result.blobs_removed, 1);
219        assert!(result.removed_blobs.contains(&ORPHAN_HASH.to_string()));
220
221        // afterHash blobs should still exist
222        assert!(tokio::fs::metadata(blobs_dir.join(AFTER_HASH_1))
223            .await
224            .is_ok());
225        assert!(tokio::fs::metadata(blobs_dir.join(AFTER_HASH_2))
226            .await
227            .is_ok());
228
229        // Orphan blob should be removed
230        assert!(tokio::fs::metadata(blobs_dir.join(ORPHAN_HASH))
231            .await
232            .is_err());
233    }
234
235    #[tokio::test]
236    async fn test_cleanup_removes_before_hash_blobs() {
237        let dir = tempfile::tempdir().unwrap();
238        let blobs_dir = dir.path().join("blobs");
239        tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
240
241        let manifest = create_test_manifest();
242
243        // Create both beforeHash and afterHash blobs
244        tokio::fs::write(blobs_dir.join(BEFORE_HASH_1), "before content 1")
245            .await
246            .unwrap();
247        tokio::fs::write(blobs_dir.join(BEFORE_HASH_2), "before content 2")
248            .await
249            .unwrap();
250        tokio::fs::write(blobs_dir.join(AFTER_HASH_1), "after content 1")
251            .await
252            .unwrap();
253        tokio::fs::write(blobs_dir.join(AFTER_HASH_2), "after content 2")
254            .await
255            .unwrap();
256
257        let result = cleanup_unused_blobs(&manifest, &blobs_dir, false)
258            .await
259            .unwrap();
260
261        // Should remove the beforeHash blobs
262        assert_eq!(result.blobs_removed, 2);
263        assert!(result.removed_blobs.contains(&BEFORE_HASH_1.to_string()));
264        assert!(result.removed_blobs.contains(&BEFORE_HASH_2.to_string()));
265
266        // afterHash blobs should still exist
267        assert!(tokio::fs::metadata(blobs_dir.join(AFTER_HASH_1))
268            .await
269            .is_ok());
270        assert!(tokio::fs::metadata(blobs_dir.join(AFTER_HASH_2))
271            .await
272            .is_ok());
273
274        // beforeHash blobs should be removed
275        assert!(tokio::fs::metadata(blobs_dir.join(BEFORE_HASH_1))
276            .await
277            .is_err());
278        assert!(tokio::fs::metadata(blobs_dir.join(BEFORE_HASH_2))
279            .await
280            .is_err());
281    }
282
283    #[tokio::test]
284    async fn test_cleanup_dry_run_does_not_delete() {
285        let dir = tempfile::tempdir().unwrap();
286        let blobs_dir = dir.path().join("blobs");
287        tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
288
289        let manifest = create_test_manifest();
290
291        tokio::fs::write(blobs_dir.join(BEFORE_HASH_1), "before content 1")
292            .await
293            .unwrap();
294        tokio::fs::write(blobs_dir.join(AFTER_HASH_1), "after content 1")
295            .await
296            .unwrap();
297
298        let result = cleanup_unused_blobs(&manifest, &blobs_dir, true)
299            .await
300            .unwrap();
301
302        // Should report beforeHash as would-be-removed
303        assert_eq!(result.blobs_removed, 1);
304        assert!(result.removed_blobs.contains(&BEFORE_HASH_1.to_string()));
305
306        // But both blobs should still exist
307        assert!(tokio::fs::metadata(blobs_dir.join(BEFORE_HASH_1))
308            .await
309            .is_ok());
310        assert!(tokio::fs::metadata(blobs_dir.join(AFTER_HASH_1))
311            .await
312            .is_ok());
313    }
314
315    #[tokio::test]
316    async fn test_cleanup_empty_manifest_removes_all() {
317        let dir = tempfile::tempdir().unwrap();
318        let blobs_dir = dir.path().join("blobs");
319        tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
320
321        let manifest = PatchManifest::new();
322
323        tokio::fs::write(blobs_dir.join(AFTER_HASH_1), "content 1")
324            .await
325            .unwrap();
326        tokio::fs::write(blobs_dir.join(BEFORE_HASH_1), "content 2")
327            .await
328            .unwrap();
329
330        let result = cleanup_unused_blobs(&manifest, &blobs_dir, false)
331            .await
332            .unwrap();
333
334        assert_eq!(result.blobs_removed, 2);
335    }
336
337    #[tokio::test]
338    async fn test_cleanup_nonexistent_blobs_dir() {
339        let dir = tempfile::tempdir().unwrap();
340        let non_existent = dir.path().join("non-existent");
341
342        let manifest = create_test_manifest();
343
344        let result = cleanup_unused_blobs(&manifest, &non_existent, false)
345            .await
346            .unwrap();
347
348        assert_eq!(result.blobs_checked, 0);
349        assert_eq!(result.blobs_removed, 0);
350    }
351
352    #[test]
353    fn test_format_bytes() {
354        assert_eq!(format_bytes(0), "0 B");
355        assert_eq!(format_bytes(500), "500 B");
356        assert_eq!(format_bytes(1023), "1023 B");
357        assert_eq!(format_bytes(1024), "1.00 KB");
358        assert_eq!(format_bytes(1536), "1.50 KB");
359        assert_eq!(format_bytes(1048576), "1.00 MB");
360        assert_eq!(format_bytes(1073741824), "1.00 GB");
361    }
362
363    #[test]
364    fn test_format_cleanup_result_no_blobs_dir() {
365        let result = CleanupResult {
366            blobs_checked: 0,
367            blobs_removed: 0,
368            bytes_freed: 0,
369            removed_blobs: vec![],
370        };
371        assert_eq!(
372            format_cleanup_result(&result, false),
373            "No blobs directory found, nothing to clean up."
374        );
375    }
376
377    #[test]
378    fn test_format_cleanup_result_all_in_use() {
379        let result = CleanupResult {
380            blobs_checked: 5,
381            blobs_removed: 0,
382            bytes_freed: 0,
383            removed_blobs: vec![],
384        };
385        assert_eq!(
386            format_cleanup_result(&result, false),
387            "Checked 5 blob(s), all are in use."
388        );
389    }
390
391    #[test]
392    fn test_format_cleanup_result_removed() {
393        let result = CleanupResult {
394            blobs_checked: 5,
395            blobs_removed: 2,
396            bytes_freed: 2048,
397            removed_blobs: vec!["aaa".to_string(), "bbb".to_string()],
398        };
399        assert_eq!(
400            format_cleanup_result(&result, false),
401            "Removed 2 unused blob(s) (2.00 KB freed)"
402        );
403    }
404
405    #[test]
406    fn test_format_cleanup_result_dry_run_lists_blobs() {
407        let result = CleanupResult {
408            blobs_checked: 5,
409            blobs_removed: 2,
410            bytes_freed: 2048,
411            removed_blobs: vec!["aaa".to_string(), "bbb".to_string()],
412        };
413        let formatted = format_cleanup_result(&result, true);
414        assert!(formatted.starts_with("Would remove 2 unused blob(s)"));
415        assert!(formatted.contains("Unused blobs:"));
416        assert!(formatted.contains("  - aaa"));
417        assert!(formatted.contains("  - bbb"));
418    }
419}