Skip to main content

socket_patch_core/utils/
cleanup_blobs.rs

1use std::collections::HashSet;
2use std::path::Path;
3
4use crate::manifest::operations::get_after_hash_blobs;
5use crate::manifest::schema::PatchManifest;
6
7/// Result of a blob cleanup operation.
8#[derive(Debug, Clone, Default)]
9pub struct CleanupResult {
10    pub blobs_checked: usize,
11    pub blobs_removed: usize,
12    pub bytes_freed: u64,
13    pub removed_blobs: Vec<String>,
14}
15
16/// Cleans up unused blob files from the blobs directory.
17///
18/// Analyzes the manifest to determine which afterHash blobs are needed for applying patches,
19/// then removes any blob files that are not needed.
20///
21/// Note: beforeHash blobs are considered "unused" because they are downloaded on-demand
22/// during rollback operations. This saves disk space since beforeHash blobs are only
23/// needed for rollback, not for applying patches.
24pub async fn cleanup_unused_blobs(
25    manifest: &PatchManifest,
26    blobs_dir: &Path,
27    dry_run: bool,
28) -> Result<CleanupResult, std::io::Error> {
29    // Only keep afterHash blobs - beforeHash blobs are downloaded on-demand during rollback
30    let used_blobs = get_after_hash_blobs(manifest);
31
32    // Check if blobs directory exists
33    if tokio::fs::metadata(blobs_dir).await.is_err() {
34        // Blobs directory doesn't exist, nothing to clean up
35        return Ok(CleanupResult {
36            blobs_checked: 0,
37            blobs_removed: 0,
38            bytes_freed: 0,
39            removed_blobs: vec![],
40        });
41    }
42
43    // Read all files in the blobs directory
44    let mut read_dir = tokio::fs::read_dir(blobs_dir).await?;
45    let mut blob_entries = Vec::new();
46
47    while let Some(entry) = read_dir.next_entry().await? {
48        blob_entries.push(entry);
49    }
50
51    let mut result = CleanupResult {
52        blobs_checked: blob_entries.len(),
53        blobs_removed: 0,
54        bytes_freed: 0,
55        removed_blobs: vec![],
56    };
57
58    // Check each blob file
59    for entry in &blob_entries {
60        let file_name = entry.file_name();
61        let file_name_str = file_name.to_string_lossy().to_string();
62
63        // Skip hidden files and directories
64        if file_name_str.starts_with('.') {
65            continue;
66        }
67
68        let blob_path = blobs_dir.join(&file_name_str);
69
70        // Check if it's a file (not a directory)
71        let metadata = tokio::fs::metadata(&blob_path).await?;
72        if !metadata.is_file() {
73            continue;
74        }
75
76        // If this blob is not in use, remove it
77        if !used_blobs.contains(&file_name_str) {
78            result.blobs_removed += 1;
79            result.bytes_freed += metadata.len();
80            result.removed_blobs.push(file_name_str);
81
82            if !dry_run {
83                tokio::fs::remove_file(&blob_path).await?;
84            }
85        }
86    }
87
88    Ok(result)
89}
90
91/// Cleans up unused per-patch archive files from `archives_dir`.
92///
93/// Archives are named `<patch_uuid>.tar.gz`. Any file matching that
94/// pattern whose UUID is not present in the manifest is removed. Files
95/// that do *not* end in `.tar.gz` are treated as orphans and also
96/// removed — these directories are managed exclusively by socket-patch,
97/// so any stray non-archive file is assumed to be left over from an
98/// older socket-patch version. Subdirectories and hidden files are
99/// left untouched.
100pub async fn cleanup_unused_archives(
101    manifest: &PatchManifest,
102    archives_dir: &Path,
103    dry_run: bool,
104) -> Result<CleanupResult, std::io::Error> {
105    let used_uuids: HashSet<String> = manifest
106        .patches
107        .values()
108        .map(|r| r.uuid.clone())
109        .collect();
110
111    if tokio::fs::metadata(archives_dir).await.is_err() {
112        return Ok(CleanupResult {
113            blobs_checked: 0,
114            blobs_removed: 0,
115            bytes_freed: 0,
116            removed_blobs: vec![],
117        });
118    }
119
120    let mut read_dir = tokio::fs::read_dir(archives_dir).await?;
121    let mut entries = Vec::new();
122    while let Some(entry) = read_dir.next_entry().await? {
123        entries.push(entry);
124    }
125
126    let mut result = CleanupResult {
127        blobs_checked: entries.len(),
128        blobs_removed: 0,
129        bytes_freed: 0,
130        removed_blobs: vec![],
131    };
132
133    for entry in &entries {
134        let file_name = entry.file_name();
135        let file_name_str = file_name.to_string_lossy().to_string();
136        if file_name_str.starts_with('.') {
137            continue;
138        }
139        let archive_path = archives_dir.join(&file_name_str);
140        let metadata = tokio::fs::metadata(&archive_path).await?;
141        if !metadata.is_file() {
142            continue;
143        }
144        // Strip the .tar.gz suffix to recover the UUID; if it doesn't end
145        // in .tar.gz, treat the entry as orphaned and remove it.
146        let uuid_part = file_name_str
147            .strip_suffix(".tar.gz")
148            .unwrap_or(&file_name_str);
149        if used_uuids.contains(uuid_part) {
150            continue;
151        }
152        result.blobs_removed += 1;
153        result.bytes_freed += metadata.len();
154        result.removed_blobs.push(file_name_str);
155        if !dry_run {
156            tokio::fs::remove_file(&archive_path).await?;
157        }
158    }
159
160    Ok(result)
161}
162
163/// Formats the cleanup result for human-readable output.
164pub fn format_cleanup_result(result: &CleanupResult, dry_run: bool) -> String {
165    if result.blobs_checked == 0 {
166        return "No blobs directory found, nothing to clean up.".to_string();
167    }
168
169    if result.blobs_removed == 0 {
170        return format!(
171            "Checked {} blob(s), all are in use.",
172            result.blobs_checked
173        );
174    }
175
176    let action = if dry_run { "Would remove" } else { "Removed" };
177    let bytes_formatted = format_bytes(result.bytes_freed);
178
179    let mut output = format!(
180        "{} {} unused blob(s) ({} freed)",
181        action, result.blobs_removed, bytes_formatted
182    );
183
184    if dry_run && !result.removed_blobs.is_empty() {
185        output.push_str("\nUnused blobs:");
186        for blob in &result.removed_blobs {
187            output.push_str(&format!("\n  - {}", blob));
188        }
189    }
190
191    output
192}
193
194/// Formats bytes into a human-readable string.
195pub fn format_bytes(bytes: u64) -> String {
196    if bytes == 0 {
197        return "0 B".to_string();
198    }
199
200    const KB: u64 = 1024;
201    const MB: u64 = 1024 * 1024;
202    const GB: u64 = 1024 * 1024 * 1024;
203
204    if bytes < KB {
205        format!("{} B", bytes)
206    } else if bytes < MB {
207        format!("{:.2} KB", bytes as f64 / KB as f64)
208    } else if bytes < GB {
209        format!("{:.2} MB", bytes as f64 / MB as f64)
210    } else {
211        format!("{:.2} GB", bytes as f64 / GB as f64)
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218    use crate::manifest::schema::{PatchFileInfo, PatchManifest, PatchRecord};
219    use std::collections::HashMap;
220
221    const TEST_UUID: &str = "11111111-1111-4111-8111-111111111111";
222    const BEFORE_HASH_1: &str =
223        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1111";
224    const AFTER_HASH_1: &str =
225        "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb1111";
226    const BEFORE_HASH_2: &str =
227        "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc2222";
228    const AFTER_HASH_2: &str =
229        "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd2222";
230    const ORPHAN_HASH: &str =
231        "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo";
232
233    fn create_test_manifest() -> PatchManifest {
234        let mut files = HashMap::new();
235        files.insert(
236            "package/index.js".to_string(),
237            PatchFileInfo {
238                before_hash: BEFORE_HASH_1.to_string(),
239                after_hash: AFTER_HASH_1.to_string(),
240            },
241        );
242        files.insert(
243            "package/lib/utils.js".to_string(),
244            PatchFileInfo {
245                before_hash: BEFORE_HASH_2.to_string(),
246                after_hash: AFTER_HASH_2.to_string(),
247            },
248        );
249
250        let mut patches = HashMap::new();
251        patches.insert(
252            "pkg:npm/pkg-a@1.0.0".to_string(),
253            PatchRecord {
254                uuid: TEST_UUID.to_string(),
255                exported_at: "2024-01-01T00:00:00Z".to_string(),
256                files,
257                vulnerabilities: HashMap::new(),
258                description: "Test patch".to_string(),
259                license: "MIT".to_string(),
260                tier: "free".to_string(),
261            },
262        );
263
264        PatchManifest { patches }
265    }
266
267    #[tokio::test]
268    async fn test_cleanup_keeps_after_hash_removes_orphan() {
269        let dir = tempfile::tempdir().unwrap();
270        let blobs_dir = dir.path().join("blobs");
271        tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
272
273        let manifest = create_test_manifest();
274
275        // Create blobs on disk
276        tokio::fs::write(blobs_dir.join(AFTER_HASH_1), "after content 1")
277            .await
278            .unwrap();
279        tokio::fs::write(blobs_dir.join(AFTER_HASH_2), "after content 2")
280            .await
281            .unwrap();
282        tokio::fs::write(blobs_dir.join(ORPHAN_HASH), "orphan content")
283            .await
284            .unwrap();
285
286        let result = cleanup_unused_blobs(&manifest, &blobs_dir, false)
287            .await
288            .unwrap();
289
290        // Should remove only the orphan blob
291        assert_eq!(result.blobs_removed, 1);
292        assert!(result.removed_blobs.contains(&ORPHAN_HASH.to_string()));
293
294        // afterHash blobs should still exist
295        assert!(tokio::fs::metadata(blobs_dir.join(AFTER_HASH_1))
296            .await
297            .is_ok());
298        assert!(tokio::fs::metadata(blobs_dir.join(AFTER_HASH_2))
299            .await
300            .is_ok());
301
302        // Orphan blob should be removed
303        assert!(tokio::fs::metadata(blobs_dir.join(ORPHAN_HASH))
304            .await
305            .is_err());
306    }
307
308    #[tokio::test]
309    async fn test_cleanup_removes_before_hash_blobs() {
310        let dir = tempfile::tempdir().unwrap();
311        let blobs_dir = dir.path().join("blobs");
312        tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
313
314        let manifest = create_test_manifest();
315
316        // Create both beforeHash and afterHash blobs
317        tokio::fs::write(blobs_dir.join(BEFORE_HASH_1), "before content 1")
318            .await
319            .unwrap();
320        tokio::fs::write(blobs_dir.join(BEFORE_HASH_2), "before content 2")
321            .await
322            .unwrap();
323        tokio::fs::write(blobs_dir.join(AFTER_HASH_1), "after content 1")
324            .await
325            .unwrap();
326        tokio::fs::write(blobs_dir.join(AFTER_HASH_2), "after content 2")
327            .await
328            .unwrap();
329
330        let result = cleanup_unused_blobs(&manifest, &blobs_dir, false)
331            .await
332            .unwrap();
333
334        // Should remove the beforeHash blobs
335        assert_eq!(result.blobs_removed, 2);
336        assert!(result.removed_blobs.contains(&BEFORE_HASH_1.to_string()));
337        assert!(result.removed_blobs.contains(&BEFORE_HASH_2.to_string()));
338
339        // afterHash blobs should still exist
340        assert!(tokio::fs::metadata(blobs_dir.join(AFTER_HASH_1))
341            .await
342            .is_ok());
343        assert!(tokio::fs::metadata(blobs_dir.join(AFTER_HASH_2))
344            .await
345            .is_ok());
346
347        // beforeHash blobs should be removed
348        assert!(tokio::fs::metadata(blobs_dir.join(BEFORE_HASH_1))
349            .await
350            .is_err());
351        assert!(tokio::fs::metadata(blobs_dir.join(BEFORE_HASH_2))
352            .await
353            .is_err());
354    }
355
356    #[tokio::test]
357    async fn test_cleanup_dry_run_does_not_delete() {
358        let dir = tempfile::tempdir().unwrap();
359        let blobs_dir = dir.path().join("blobs");
360        tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
361
362        let manifest = create_test_manifest();
363
364        tokio::fs::write(blobs_dir.join(BEFORE_HASH_1), "before content 1")
365            .await
366            .unwrap();
367        tokio::fs::write(blobs_dir.join(AFTER_HASH_1), "after content 1")
368            .await
369            .unwrap();
370
371        let result = cleanup_unused_blobs(&manifest, &blobs_dir, true)
372            .await
373            .unwrap();
374
375        // Should report beforeHash as would-be-removed
376        assert_eq!(result.blobs_removed, 1);
377        assert!(result.removed_blobs.contains(&BEFORE_HASH_1.to_string()));
378
379        // But both blobs should still exist
380        assert!(tokio::fs::metadata(blobs_dir.join(BEFORE_HASH_1))
381            .await
382            .is_ok());
383        assert!(tokio::fs::metadata(blobs_dir.join(AFTER_HASH_1))
384            .await
385            .is_ok());
386    }
387
388    #[tokio::test]
389    async fn test_cleanup_empty_manifest_removes_all() {
390        let dir = tempfile::tempdir().unwrap();
391        let blobs_dir = dir.path().join("blobs");
392        tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
393
394        let manifest = PatchManifest::new();
395
396        tokio::fs::write(blobs_dir.join(AFTER_HASH_1), "content 1")
397            .await
398            .unwrap();
399        tokio::fs::write(blobs_dir.join(BEFORE_HASH_1), "content 2")
400            .await
401            .unwrap();
402
403        let result = cleanup_unused_blobs(&manifest, &blobs_dir, false)
404            .await
405            .unwrap();
406
407        assert_eq!(result.blobs_removed, 2);
408    }
409
410    #[tokio::test]
411    async fn test_cleanup_nonexistent_blobs_dir() {
412        let dir = tempfile::tempdir().unwrap();
413        let non_existent = dir.path().join("non-existent");
414
415        let manifest = create_test_manifest();
416
417        let result = cleanup_unused_blobs(&manifest, &non_existent, false)
418            .await
419            .unwrap();
420
421        assert_eq!(result.blobs_checked, 0);
422        assert_eq!(result.blobs_removed, 0);
423    }
424
425    #[test]
426    fn test_format_bytes() {
427        assert_eq!(format_bytes(0), "0 B");
428        assert_eq!(format_bytes(500), "500 B");
429        assert_eq!(format_bytes(1023), "1023 B");
430        assert_eq!(format_bytes(1024), "1.00 KB");
431        assert_eq!(format_bytes(1536), "1.50 KB");
432        assert_eq!(format_bytes(1048576), "1.00 MB");
433        assert_eq!(format_bytes(1073741824), "1.00 GB");
434    }
435
436    #[test]
437    fn test_format_cleanup_result_no_blobs_dir() {
438        let result = CleanupResult {
439            blobs_checked: 0,
440            blobs_removed: 0,
441            bytes_freed: 0,
442            removed_blobs: vec![],
443        };
444        assert_eq!(
445            format_cleanup_result(&result, false),
446            "No blobs directory found, nothing to clean up."
447        );
448    }
449
450    #[test]
451    fn test_format_cleanup_result_all_in_use() {
452        let result = CleanupResult {
453            blobs_checked: 5,
454            blobs_removed: 0,
455            bytes_freed: 0,
456            removed_blobs: vec![],
457        };
458        assert_eq!(
459            format_cleanup_result(&result, false),
460            "Checked 5 blob(s), all are in use."
461        );
462    }
463
464    #[test]
465    fn test_format_cleanup_result_removed() {
466        let result = CleanupResult {
467            blobs_checked: 5,
468            blobs_removed: 2,
469            bytes_freed: 2048,
470            removed_blobs: vec!["aaa".to_string(), "bbb".to_string()],
471        };
472        assert_eq!(
473            format_cleanup_result(&result, false),
474            "Removed 2 unused blob(s) (2.00 KB freed)"
475        );
476    }
477
478    // ── cleanup_unused_archives tests ──────────────────────────────
479
480    const SECOND_UUID: &str = "22222222-2222-4222-8222-222222222222";
481
482    #[tokio::test]
483    async fn test_cleanup_archives_keeps_referenced_uuid() {
484        let dir = tempfile::tempdir().unwrap();
485        let archives = dir.path().join("packages");
486        tokio::fs::create_dir_all(&archives).await.unwrap();
487
488        let manifest = create_test_manifest();
489        tokio::fs::write(archives.join(format!("{TEST_UUID}.tar.gz")), b"keep")
490            .await
491            .unwrap();
492        tokio::fs::write(archives.join(format!("{SECOND_UUID}.tar.gz")), b"orphan")
493            .await
494            .unwrap();
495
496        let result = cleanup_unused_archives(&manifest, &archives, false)
497            .await
498            .unwrap();
499
500        assert_eq!(result.blobs_removed, 1);
501        assert!(result
502            .removed_blobs
503            .contains(&format!("{SECOND_UUID}.tar.gz")));
504        assert!(tokio::fs::metadata(archives.join(format!("{TEST_UUID}.tar.gz")))
505            .await
506            .is_ok());
507        assert!(tokio::fs::metadata(archives.join(format!("{SECOND_UUID}.tar.gz")))
508            .await
509            .is_err());
510    }
511
512    #[tokio::test]
513    async fn test_cleanup_archives_dry_run_does_not_delete() {
514        let dir = tempfile::tempdir().unwrap();
515        let archives = dir.path().join("packages");
516        tokio::fs::create_dir_all(&archives).await.unwrap();
517
518        let manifest = create_test_manifest();
519        tokio::fs::write(archives.join(format!("{SECOND_UUID}.tar.gz")), b"orphan")
520            .await
521            .unwrap();
522
523        let result = cleanup_unused_archives(&manifest, &archives, true)
524            .await
525            .unwrap();
526
527        assert_eq!(result.blobs_removed, 1);
528        assert!(tokio::fs::metadata(archives.join(format!("{SECOND_UUID}.tar.gz")))
529            .await
530            .is_ok());
531    }
532
533    #[tokio::test]
534    async fn test_cleanup_archives_removes_non_archive_files() {
535        // Stray files (no .tar.gz suffix, or wrong UUID) are treated as
536        // orphans. This keeps the directory tidy when the on-disk format
537        // changes in the future.
538        let dir = tempfile::tempdir().unwrap();
539        let archives = dir.path().join("packages");
540        tokio::fs::create_dir_all(&archives).await.unwrap();
541
542        let manifest = create_test_manifest();
543        tokio::fs::write(archives.join("stray.txt"), b"junk")
544            .await
545            .unwrap();
546        tokio::fs::write(archives.join(format!("{TEST_UUID}.tar.gz")), b"keep")
547            .await
548            .unwrap();
549
550        let result = cleanup_unused_archives(&manifest, &archives, false)
551            .await
552            .unwrap();
553
554        assert_eq!(result.blobs_removed, 1);
555        assert!(result.removed_blobs.contains(&"stray.txt".to_string()));
556    }
557
558    #[tokio::test]
559    async fn test_cleanup_archives_nonexistent_dir() {
560        let dir = tempfile::tempdir().unwrap();
561        let archives = dir.path().join("does-not-exist");
562        let manifest = create_test_manifest();
563
564        let result = cleanup_unused_archives(&manifest, &archives, false)
565            .await
566            .unwrap();
567        assert_eq!(result.blobs_checked, 0);
568        assert_eq!(result.blobs_removed, 0);
569    }
570
571    #[test]
572    fn test_format_cleanup_result_dry_run_lists_blobs() {
573        let result = CleanupResult {
574            blobs_checked: 5,
575            blobs_removed: 2,
576            bytes_freed: 2048,
577            removed_blobs: vec!["aaa".to_string(), "bbb".to_string()],
578        };
579        let formatted = format_cleanup_result(&result, true);
580        assert!(formatted.starts_with("Would remove 2 unused blob(s)"));
581        assert!(formatted.contains("Unused blobs:"));
582        assert!(formatted.contains("  - aaa"));
583        assert!(formatted.contains("  - bbb"));
584    }
585}