1use std::collections::HashSet;
2use std::path::Path;
3
4use crate::manifest::operations::get_after_hash_blobs;
5use crate::manifest::schema::PatchManifest;
6
7#[derive(Debug, Clone, Default)]
9pub struct CleanupResult {
10 pub blobs_checked: usize,
11 pub blobs_removed: usize,
12 pub bytes_freed: u64,
13 pub removed_blobs: Vec<String>,
14}
15
16async fn cleanup_dir<F: Fn(&str) -> bool>(
22 dir: &Path,
23 dry_run: bool,
24 is_used: F,
25) -> Result<CleanupResult, std::io::Error> {
26 if tokio::fs::metadata(dir).await.is_err() {
27 return Ok(CleanupResult::default());
28 }
29
30 let mut read_dir = tokio::fs::read_dir(dir).await?;
31 let mut entries = Vec::new();
32 while let Some(entry) = read_dir.next_entry().await? {
33 entries.push(entry);
34 }
35
36 let mut result = CleanupResult::default();
37
38 for entry in &entries {
39 let file_name_str = entry.file_name().to_string_lossy().to_string();
40 if file_name_str.starts_with('.') {
41 continue;
42 }
43 let path = dir.join(&file_name_str);
44 let metadata = match tokio::fs::symlink_metadata(&path).await {
50 Ok(m) => m,
51 Err(_) => continue,
52 };
53 if !metadata.is_file() {
54 continue;
55 }
56 result.blobs_checked += 1;
58 if is_used(&file_name_str) {
59 continue;
60 }
61 result.blobs_removed += 1;
62 result.bytes_freed += metadata.len();
63 result.removed_blobs.push(file_name_str);
64 if !dry_run {
65 tokio::fs::remove_file(&path).await?;
66 }
67 }
68
69 Ok(result)
70}
71
72pub async fn cleanup_unused_blobs(
81 manifest: &PatchManifest,
82 blobs_dir: &Path,
83 dry_run: bool,
84) -> Result<CleanupResult, std::io::Error> {
85 let used_blobs = get_after_hash_blobs(manifest);
87 cleanup_dir(blobs_dir, dry_run, |name| used_blobs.contains(name)).await
88}
89
90pub async fn cleanup_unused_archives(
100 manifest: &PatchManifest,
101 archives_dir: &Path,
102 dry_run: bool,
103) -> Result<CleanupResult, std::io::Error> {
104 let used_uuids: HashSet<String> = manifest.patches.values().map(|r| r.uuid.clone()).collect();
105 cleanup_dir(archives_dir, dry_run, |name| {
106 let uuid_part = name.strip_suffix(".tar.gz").unwrap_or(name);
109 used_uuids.contains(uuid_part)
110 })
111 .await
112}
113
114pub fn format_cleanup_result(result: &CleanupResult, dry_run: bool) -> String {
116 if result.blobs_checked == 0 {
117 return "No blobs directory found, nothing to clean up.".to_string();
118 }
119
120 if result.blobs_removed == 0 {
121 return format!("Checked {} blob(s), all are in use.", result.blobs_checked);
122 }
123
124 let action = if dry_run { "Would remove" } else { "Removed" };
125 let bytes_formatted = format_bytes(result.bytes_freed);
126
127 let mut output = format!(
128 "{} {} unused blob(s) ({} freed)",
129 action, result.blobs_removed, bytes_formatted
130 );
131
132 if dry_run && !result.removed_blobs.is_empty() {
133 output.push_str("\nUnused blobs:");
134 for blob in &result.removed_blobs {
135 output.push_str(&format!("\n - {}", blob));
136 }
137 }
138
139 output
140}
141
142pub fn format_bytes(bytes: u64) -> String {
144 if bytes == 0 {
145 return "0 B".to_string();
146 }
147
148 const KB: u64 = 1024;
149 const MB: u64 = 1024 * 1024;
150 const GB: u64 = 1024 * 1024 * 1024;
151
152 if bytes < KB {
153 format!("{} B", bytes)
154 } else if bytes < MB {
155 format!("{:.2} KB", bytes as f64 / KB as f64)
156 } else if bytes < GB {
157 format!("{:.2} MB", bytes as f64 / MB as f64)
158 } else {
159 format!("{:.2} GB", bytes as f64 / GB as f64)
160 }
161}
162
163#[cfg(test)]
164mod tests {
165 use super::*;
166 use crate::manifest::schema::{PatchFileInfo, PatchManifest, PatchRecord};
167 use std::collections::HashMap;
168
169 const TEST_UUID: &str = "11111111-1111-4111-8111-111111111111";
170 const BEFORE_HASH_1: &str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1111";
171 const AFTER_HASH_1: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb1111";
172 const BEFORE_HASH_2: &str = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc2222";
173 const AFTER_HASH_2: &str = "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd2222";
174 const ORPHAN_HASH: &str = "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo";
175
176 fn create_test_manifest() -> PatchManifest {
177 let mut files = HashMap::new();
178 files.insert(
179 "package/index.js".to_string(),
180 PatchFileInfo {
181 before_hash: BEFORE_HASH_1.to_string(),
182 after_hash: AFTER_HASH_1.to_string(),
183 },
184 );
185 files.insert(
186 "package/lib/utils.js".to_string(),
187 PatchFileInfo {
188 before_hash: BEFORE_HASH_2.to_string(),
189 after_hash: AFTER_HASH_2.to_string(),
190 },
191 );
192
193 let mut patches = HashMap::new();
194 patches.insert(
195 "pkg:npm/pkg-a@1.0.0".to_string(),
196 PatchRecord {
197 uuid: TEST_UUID.to_string(),
198 exported_at: "2024-01-01T00:00:00Z".to_string(),
199 files,
200 vulnerabilities: HashMap::new(),
201 description: "Test patch".to_string(),
202 license: "MIT".to_string(),
203 tier: "free".to_string(),
204 },
205 );
206
207 PatchManifest { patches }
208 }
209
210 #[tokio::test]
211 async fn test_cleanup_keeps_after_hash_removes_orphan() {
212 let dir = tempfile::tempdir().unwrap();
213 let blobs_dir = dir.path().join("blobs");
214 tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
215
216 let manifest = create_test_manifest();
217
218 tokio::fs::write(blobs_dir.join(AFTER_HASH_1), "after content 1")
220 .await
221 .unwrap();
222 tokio::fs::write(blobs_dir.join(AFTER_HASH_2), "after content 2")
223 .await
224 .unwrap();
225 tokio::fs::write(blobs_dir.join(ORPHAN_HASH), "orphan content")
226 .await
227 .unwrap();
228
229 let result = cleanup_unused_blobs(&manifest, &blobs_dir, false)
230 .await
231 .unwrap();
232
233 assert_eq!(result.blobs_removed, 1);
235 assert!(result.removed_blobs.contains(&ORPHAN_HASH.to_string()));
236
237 assert!(tokio::fs::metadata(blobs_dir.join(AFTER_HASH_1))
239 .await
240 .is_ok());
241 assert!(tokio::fs::metadata(blobs_dir.join(AFTER_HASH_2))
242 .await
243 .is_ok());
244
245 assert!(tokio::fs::metadata(blobs_dir.join(ORPHAN_HASH))
247 .await
248 .is_err());
249 }
250
251 #[tokio::test]
252 async fn test_cleanup_removes_before_hash_blobs() {
253 let dir = tempfile::tempdir().unwrap();
254 let blobs_dir = dir.path().join("blobs");
255 tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
256
257 let manifest = create_test_manifest();
258
259 tokio::fs::write(blobs_dir.join(BEFORE_HASH_1), "before content 1")
261 .await
262 .unwrap();
263 tokio::fs::write(blobs_dir.join(BEFORE_HASH_2), "before content 2")
264 .await
265 .unwrap();
266 tokio::fs::write(blobs_dir.join(AFTER_HASH_1), "after content 1")
267 .await
268 .unwrap();
269 tokio::fs::write(blobs_dir.join(AFTER_HASH_2), "after content 2")
270 .await
271 .unwrap();
272
273 let result = cleanup_unused_blobs(&manifest, &blobs_dir, false)
274 .await
275 .unwrap();
276
277 assert_eq!(result.blobs_removed, 2);
279 assert!(result.removed_blobs.contains(&BEFORE_HASH_1.to_string()));
280 assert!(result.removed_blobs.contains(&BEFORE_HASH_2.to_string()));
281
282 assert!(tokio::fs::metadata(blobs_dir.join(AFTER_HASH_1))
284 .await
285 .is_ok());
286 assert!(tokio::fs::metadata(blobs_dir.join(AFTER_HASH_2))
287 .await
288 .is_ok());
289
290 assert!(tokio::fs::metadata(blobs_dir.join(BEFORE_HASH_1))
292 .await
293 .is_err());
294 assert!(tokio::fs::metadata(blobs_dir.join(BEFORE_HASH_2))
295 .await
296 .is_err());
297 }
298
299 #[tokio::test]
300 async fn test_cleanup_dry_run_does_not_delete() {
301 let dir = tempfile::tempdir().unwrap();
302 let blobs_dir = dir.path().join("blobs");
303 tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
304
305 let manifest = create_test_manifest();
306
307 tokio::fs::write(blobs_dir.join(BEFORE_HASH_1), "before content 1")
308 .await
309 .unwrap();
310 tokio::fs::write(blobs_dir.join(AFTER_HASH_1), "after content 1")
311 .await
312 .unwrap();
313
314 let result = cleanup_unused_blobs(&manifest, &blobs_dir, true)
315 .await
316 .unwrap();
317
318 assert_eq!(result.blobs_removed, 1);
320 assert!(result.removed_blobs.contains(&BEFORE_HASH_1.to_string()));
321
322 assert!(tokio::fs::metadata(blobs_dir.join(BEFORE_HASH_1))
324 .await
325 .is_ok());
326 assert!(tokio::fs::metadata(blobs_dir.join(AFTER_HASH_1))
327 .await
328 .is_ok());
329 }
330
331 #[tokio::test]
332 async fn test_cleanup_empty_manifest_removes_all() {
333 let dir = tempfile::tempdir().unwrap();
334 let blobs_dir = dir.path().join("blobs");
335 tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
336
337 let manifest = PatchManifest::new();
338
339 tokio::fs::write(blobs_dir.join(AFTER_HASH_1), "content 1")
340 .await
341 .unwrap();
342 tokio::fs::write(blobs_dir.join(BEFORE_HASH_1), "content 2")
343 .await
344 .unwrap();
345
346 let result = cleanup_unused_blobs(&manifest, &blobs_dir, false)
347 .await
348 .unwrap();
349
350 assert_eq!(result.blobs_removed, 2);
351 }
352
353 #[tokio::test]
354 async fn test_cleanup_nonexistent_blobs_dir() {
355 let dir = tempfile::tempdir().unwrap();
356 let non_existent = dir.path().join("non-existent");
357
358 let manifest = create_test_manifest();
359
360 let result = cleanup_unused_blobs(&manifest, &non_existent, false)
361 .await
362 .unwrap();
363
364 assert_eq!(result.blobs_checked, 0);
365 assert_eq!(result.blobs_removed, 0);
366 }
367
368 #[test]
369 fn test_format_bytes() {
370 assert_eq!(format_bytes(0), "0 B");
371 assert_eq!(format_bytes(500), "500 B");
372 assert_eq!(format_bytes(1023), "1023 B");
373 assert_eq!(format_bytes(1024), "1.00 KB");
374 assert_eq!(format_bytes(1536), "1.50 KB");
375 assert_eq!(format_bytes(1048576), "1.00 MB");
376 assert_eq!(format_bytes(1073741824), "1.00 GB");
377 }
378
379 #[test]
380 fn test_format_cleanup_result_no_blobs_dir() {
381 let result = CleanupResult {
382 blobs_checked: 0,
383 blobs_removed: 0,
384 bytes_freed: 0,
385 removed_blobs: vec![],
386 };
387 assert_eq!(
388 format_cleanup_result(&result, false),
389 "No blobs directory found, nothing to clean up."
390 );
391 }
392
393 #[test]
394 fn test_format_cleanup_result_all_in_use() {
395 let result = CleanupResult {
396 blobs_checked: 5,
397 blobs_removed: 0,
398 bytes_freed: 0,
399 removed_blobs: vec![],
400 };
401 assert_eq!(
402 format_cleanup_result(&result, false),
403 "Checked 5 blob(s), all are in use."
404 );
405 }
406
407 #[test]
408 fn test_format_cleanup_result_removed() {
409 let result = CleanupResult {
410 blobs_checked: 5,
411 blobs_removed: 2,
412 bytes_freed: 2048,
413 removed_blobs: vec!["aaa".to_string(), "bbb".to_string()],
414 };
415 assert_eq!(
416 format_cleanup_result(&result, false),
417 "Removed 2 unused blob(s) (2.00 KB freed)"
418 );
419 }
420
421 const SECOND_UUID: &str = "22222222-2222-4222-8222-222222222222";
424
425 #[tokio::test]
426 async fn test_cleanup_archives_keeps_referenced_uuid() {
427 let dir = tempfile::tempdir().unwrap();
428 let archives = dir.path().join("packages");
429 tokio::fs::create_dir_all(&archives).await.unwrap();
430
431 let manifest = create_test_manifest();
432 tokio::fs::write(archives.join(format!("{TEST_UUID}.tar.gz")), b"keep")
433 .await
434 .unwrap();
435 tokio::fs::write(archives.join(format!("{SECOND_UUID}.tar.gz")), b"orphan")
436 .await
437 .unwrap();
438
439 let result = cleanup_unused_archives(&manifest, &archives, false)
440 .await
441 .unwrap();
442
443 assert_eq!(result.blobs_removed, 1);
444 assert!(result
445 .removed_blobs
446 .contains(&format!("{SECOND_UUID}.tar.gz")));
447 assert!(
448 tokio::fs::metadata(archives.join(format!("{TEST_UUID}.tar.gz")))
449 .await
450 .is_ok()
451 );
452 assert!(
453 tokio::fs::metadata(archives.join(format!("{SECOND_UUID}.tar.gz")))
454 .await
455 .is_err()
456 );
457 }
458
459 #[tokio::test]
460 async fn test_cleanup_archives_dry_run_does_not_delete() {
461 let dir = tempfile::tempdir().unwrap();
462 let archives = dir.path().join("packages");
463 tokio::fs::create_dir_all(&archives).await.unwrap();
464
465 let manifest = create_test_manifest();
466 tokio::fs::write(archives.join(format!("{SECOND_UUID}.tar.gz")), b"orphan")
467 .await
468 .unwrap();
469
470 let result = cleanup_unused_archives(&manifest, &archives, true)
471 .await
472 .unwrap();
473
474 assert_eq!(result.blobs_removed, 1);
475 assert!(
476 tokio::fs::metadata(archives.join(format!("{SECOND_UUID}.tar.gz")))
477 .await
478 .is_ok()
479 );
480 }
481
482 #[tokio::test]
483 async fn test_cleanup_archives_removes_non_archive_files() {
484 let dir = tempfile::tempdir().unwrap();
488 let archives = dir.path().join("packages");
489 tokio::fs::create_dir_all(&archives).await.unwrap();
490
491 let manifest = create_test_manifest();
492 tokio::fs::write(archives.join("stray.txt"), b"junk")
493 .await
494 .unwrap();
495 tokio::fs::write(archives.join(format!("{TEST_UUID}.tar.gz")), b"keep")
496 .await
497 .unwrap();
498
499 let result = cleanup_unused_archives(&manifest, &archives, false)
500 .await
501 .unwrap();
502
503 assert_eq!(result.blobs_removed, 1);
504 assert!(result.removed_blobs.contains(&"stray.txt".to_string()));
505 }
506
507 #[tokio::test]
508 async fn test_cleanup_archives_nonexistent_dir() {
509 let dir = tempfile::tempdir().unwrap();
510 let archives = dir.path().join("does-not-exist");
511 let manifest = create_test_manifest();
512
513 let result = cleanup_unused_archives(&manifest, &archives, false)
514 .await
515 .unwrap();
516 assert_eq!(result.blobs_checked, 0);
517 assert_eq!(result.blobs_removed, 0);
518 }
519
520 #[tokio::test]
521 async fn test_cleanup_does_not_count_subdirs_or_hidden_files() {
522 let dir = tempfile::tempdir().unwrap();
527 let blobs_dir = dir.path().join("blobs");
528 tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
529
530 let manifest = create_test_manifest();
531
532 tokio::fs::write(blobs_dir.join(AFTER_HASH_1), "after content 1")
534 .await
535 .unwrap();
536 tokio::fs::create_dir_all(blobs_dir.join("subdir"))
537 .await
538 .unwrap();
539 tokio::fs::write(blobs_dir.join(".hidden"), "hidden")
540 .await
541 .unwrap();
542
543 let result = cleanup_unused_blobs(&manifest, &blobs_dir, false)
544 .await
545 .unwrap();
546
547 assert_eq!(result.blobs_checked, 1);
549 assert_eq!(result.blobs_removed, 0);
550
551 assert!(tokio::fs::metadata(blobs_dir.join("subdir")).await.is_ok());
553 assert!(tokio::fs::metadata(blobs_dir.join(".hidden")).await.is_ok());
554 }
555
556 #[tokio::test]
557 async fn test_cleanup_empty_existing_dir_checks_nothing() {
558 let dir = tempfile::tempdir().unwrap();
561 let blobs_dir = dir.path().join("blobs");
562 tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
563
564 let result = cleanup_unused_blobs(&create_test_manifest(), &blobs_dir, false)
565 .await
566 .unwrap();
567
568 assert_eq!(result.blobs_checked, 0);
569 assert_eq!(result.blobs_removed, 0);
570 }
571
572 #[cfg(unix)]
573 #[tokio::test]
574 async fn test_cleanup_dangling_symlink_does_not_abort() {
575 use std::os::unix::fs::symlink;
579
580 let dir = tempfile::tempdir().unwrap();
581 let blobs_dir = dir.path().join("blobs");
582 tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
583
584 let manifest = create_test_manifest();
585
586 tokio::fs::write(blobs_dir.join(ORPHAN_HASH), "orphan content")
588 .await
589 .unwrap();
590 symlink(
592 blobs_dir.join("missing-target"),
593 blobs_dir.join("dangling-link"),
594 )
595 .unwrap();
596
597 let result = cleanup_unused_blobs(&manifest, &blobs_dir, false)
598 .await
599 .unwrap();
600
601 assert_eq!(result.blobs_removed, 1);
604 assert!(result.removed_blobs.contains(&ORPHAN_HASH.to_string()));
605 assert!(tokio::fs::metadata(blobs_dir.join(ORPHAN_HASH))
606 .await
607 .is_err());
608 assert!(tokio::fs::symlink_metadata(blobs_dir.join("dangling-link"))
609 .await
610 .is_ok());
611 }
612
613 #[cfg(unix)]
614 #[tokio::test]
615 async fn test_cleanup_does_not_follow_symlink_to_used_target() {
616 use std::os::unix::fs::symlink;
619
620 let dir = tempfile::tempdir().unwrap();
621 let blobs_dir = dir.path().join("blobs");
622 tokio::fs::create_dir_all(&blobs_dir).await.unwrap();
623
624 let manifest = create_test_manifest();
625
626 let outside = dir.path().join("outside.bin");
628 tokio::fs::write(&outside, vec![0u8; 4096]).await.unwrap();
629 symlink(&outside, blobs_dir.join("link-to-outside")).unwrap();
630
631 let result = cleanup_unused_blobs(&manifest, &blobs_dir, false)
632 .await
633 .unwrap();
634
635 assert_eq!(result.blobs_checked, 0);
636 assert_eq!(result.blobs_removed, 0);
637 assert_eq!(result.bytes_freed, 0);
638 assert!(
640 tokio::fs::symlink_metadata(blobs_dir.join("link-to-outside"))
641 .await
642 .is_ok()
643 );
644 assert!(tokio::fs::metadata(&outside).await.is_ok());
645 }
646
647 #[test]
648 fn test_format_cleanup_result_dry_run_lists_blobs() {
649 let result = CleanupResult {
650 blobs_checked: 5,
651 blobs_removed: 2,
652 bytes_freed: 2048,
653 removed_blobs: vec!["aaa".to_string(), "bbb".to_string()],
654 };
655 let formatted = format_cleanup_result(&result, true);
656 assert!(formatted.starts_with("Would remove 2 unused blob(s)"));
657 assert!(formatted.contains("Unused blobs:"));
658 assert!(formatted.contains(" - aaa"));
659 assert!(formatted.contains(" - bbb"));
660 }
661}