1use std::{
6 cmp::Ordering,
7 collections::{BTreeMap, BTreeSet},
8 str::FromStr,
9 sync::{Arc, Mutex},
10};
11
12use bytesize::ByteSize;
13use chrono::{DateTime, Duration, Local};
14use derive_more::Add;
15use derive_setters::Setters;
16use enumset::{EnumSet, EnumSetType};
17use itertools::Itertools;
18use log::{info, warn};
19use rayon::prelude::{IntoParallelIterator, ParallelIterator};
20use serde::{Deserialize, Serialize};
21
22use crate::{
23 backend::{
24 FileType, ReadBackend,
25 decrypt::{DecryptReadBackend, DecryptWriteBackend},
26 node::NodeType,
27 },
28 blob::{
29 BlobId, BlobType, BlobTypeMap, Initialize,
30 packer::{PackSizer, Repacker},
31 tree::TreeStreamerOnce,
32 },
33 error::{ErrorKind, RusticError, RusticResult},
34 index::{
35 GlobalIndex, ReadGlobalIndex, ReadIndex,
36 binarysorted::{IndexCollector, IndexType},
37 indexer::Indexer,
38 },
39 progress::{Progress, ProgressBars},
40 repofile::{
41 HeaderEntry, IndexBlob, IndexFile, IndexPack, SnapshotFile, SnapshotId, indexfile::IndexId,
42 packfile::PackId,
43 },
44 repository::{Open, Repository},
45};
46
47pub(super) mod constants {
48 pub(super) const MIN_INDEX_LEN: usize = 10_000;
50}
51
52#[allow(clippy::struct_excessive_bools)]
53#[cfg_attr(feature = "clap", derive(clap::Parser))]
54#[derive(Debug, Clone, Setters)]
55#[setters(into)]
56#[non_exhaustive]
57pub struct PruneOptions {
59 #[cfg_attr(
61 feature = "clap",
62 clap(long, value_name = "LIMIT", default_value = "10%")
63 )]
64 pub max_repack: LimitOption,
65
66 #[cfg_attr(
68 feature = "clap",
69 clap(long, value_name = "LIMIT", default_value = "5%")
70 )]
71 pub max_unused: LimitOption,
72
73 #[cfg_attr(
76 feature = "clap",
77 clap(long, value_name = "DURATION", default_value = "0d")
78 )]
79 pub keep_pack: humantime::Duration,
80
81 #[cfg_attr(
84 feature = "clap",
85 clap(long, value_name = "DURATION", default_value = "23h")
86 )]
87 pub keep_delete: humantime::Duration,
88
89 #[cfg_attr(feature = "clap", clap(long))]
95 pub instant_delete: bool,
96
97 #[cfg_attr(feature = "clap", clap(long))]
103 pub early_delete_index: bool,
104
105 #[cfg_attr(feature = "clap", clap(long))]
107 pub fast_repack: bool,
108
109 #[cfg_attr(feature = "clap", clap(long, conflicts_with = "fast_repack"))]
112 pub repack_uncompressed: bool,
113
114 #[cfg_attr(feature = "clap", clap(long))]
116 pub repack_all: bool,
117
118 #[cfg_attr(feature = "clap", clap(long, value_name = "TRUE/FALSE"))]
120 pub repack_cacheable_only: Option<bool>,
121
122 #[cfg_attr(feature = "clap", clap(long))]
124 pub no_resize: bool,
125
126 #[cfg_attr(feature = "clap", clap(skip))]
127 pub ignore_snaps: Vec<SnapshotId>,
135}
136
137impl Default for PruneOptions {
138 fn default() -> Self {
139 Self {
140 max_repack: LimitOption::Percentage(10),
141 max_unused: LimitOption::Percentage(5),
142 keep_pack: std::time::Duration::from_secs(0).into(),
143 keep_delete: std::time::Duration::from_secs(82800).into(), instant_delete: false,
145 early_delete_index: false,
146 fast_repack: false,
147 repack_uncompressed: false,
148 repack_all: false,
149 repack_cacheable_only: None,
150 no_resize: false,
151 ignore_snaps: Vec::new(),
152 }
153 }
154}
155
156impl PruneOptions {
157 #[deprecated(
173 since = "0.5.2",
174 note = "Use `PrunePlan::from_prune_options()` instead"
175 )]
176 pub fn get_plan<P: ProgressBars, S: Open>(
177 &self,
178 repo: &Repository<P, S>,
179 ) -> RusticResult<PrunePlan> {
180 PrunePlan::from_prune_options(repo, self)
181 }
182}
183
184#[derive(Clone, Copy, Debug)]
186#[non_exhaustive]
187pub enum LimitOption {
188 Size(ByteSize),
190 Percentage(u64),
192 Unlimited,
194}
195
196impl FromStr for LimitOption {
197 type Err = Box<RusticError>;
198 fn from_str(s: &str) -> Result<Self, Self::Err> {
199 Ok(match s.chars().last().unwrap_or('0') {
200 '%' => Self::Percentage({
201 let mut copy = s.to_string();
202 _ = copy.pop();
203 copy.parse().map_err(|err| {
204 RusticError::with_source(
205 ErrorKind::InvalidInput,
206 "Failed to parse percentage limit `{limit}`",
207 err,
208 )
209 .attach_context("limit", s)
210 })?
211 }),
212 'd' if s == "unlimited" => Self::Unlimited,
213 _ => {
214 let byte_size = ByteSize::from_str(s).map_err(|err| {
215 RusticError::with_source(
216 ErrorKind::InvalidInput,
217 "Failed to parse size limit `{limit}`",
218 err,
219 )
220 .attach_context("limit", s)
221 })?;
222
223 Self::Size(byte_size)
224 }
225 })
226 }
227}
228
229#[derive(EnumSetType, Debug, PartialOrd, Ord, Serialize, Deserialize)]
230#[enumset(serialize_repr = "list")]
231pub enum PackStatus {
232 NotCompressed,
233 TooYoung,
234 TimeNotSet,
235 TooLarge,
236 TooSmall,
237 HasUnusedBlobs,
238 HasUsedBlobs,
239 Marked,
240}
241
242#[derive(Debug, Clone, Copy, Serialize)]
243pub struct DebugDetailedStats {
244 pub packs: u64,
245 pub unused_blobs: u64,
246 pub unused_size: u64,
247 pub used_blobs: u64,
248 pub used_size: u64,
249}
250
251#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq, PartialOrd, Ord)]
252pub struct DebugStatsKey {
253 pub todo: PackToDo,
254 pub blob_type: BlobType,
255 pub status: EnumSet<PackStatus>,
256}
257
258#[derive(Debug, Default, Serialize)]
259pub struct DebugStats(pub BTreeMap<DebugStatsKey, DebugDetailedStats>);
260
261impl DebugStats {
262 fn add(&mut self, pi: &PackInfo, todo: PackToDo, status: EnumSet<PackStatus>) {
263 let blob_type = pi.blob_type;
264 let details = self
265 .0
266 .entry(DebugStatsKey {
267 todo,
268 blob_type,
269 status,
270 })
271 .or_insert(DebugDetailedStats {
272 packs: 0,
273 unused_blobs: 0,
274 unused_size: 0,
275 used_blobs: 0,
276 used_size: 0,
277 });
278 details.packs += 1;
279 details.unused_blobs += u64::from(pi.unused_blobs);
280 details.unused_size += u64::from(pi.unused_size);
281 details.used_blobs += u64::from(pi.used_blobs);
282 details.used_size += u64::from(pi.used_size);
283 }
284}
285
286#[derive(Default, Debug, Clone, Copy)]
288pub struct DeleteStats {
289 pub remove: u64,
291 pub recover: u64,
293 pub keep: u64,
295}
296
297impl DeleteStats {
298 pub const fn total(&self) -> u64 {
300 self.remove + self.recover + self.keep
301 }
302}
303#[derive(Debug, Default, Clone, Copy)]
304pub struct PackStats {
306 pub used: u64,
308 pub partly_used: u64,
310 pub unused: u64, pub repack: u64,
314 pub keep: u64,
316}
317
318#[derive(Debug, Default, Clone, Copy, Add)]
319pub struct SizeStats {
321 pub used: u64,
323 pub unused: u64,
325 pub remove: u64,
327 pub repack: u64,
329 pub repackrm: u64,
331}
332
333impl SizeStats {
334 pub const fn total(&self) -> u64 {
336 self.used + self.unused
337 }
338
339 pub const fn total_after_prune(&self) -> u64 {
341 self.used + self.unused_after_prune()
342 }
343
344 pub const fn unused_after_prune(&self) -> u64 {
346 self.unused - self.remove - self.repackrm
347 }
348}
349
350#[derive(Default, Debug)]
352pub struct PruneStats {
353 pub packs_to_delete: DeleteStats,
355 pub size_to_delete: DeleteStats,
357 pub packs: PackStats,
359 pub blobs: BlobTypeMap<SizeStats>,
361 pub size: BlobTypeMap<SizeStats>,
363 pub packs_unref: u64,
365 pub size_unref: u64,
367 pub index_files: u64,
369 pub index_files_rebuild: u64,
371 pub debug: DebugStats,
373}
374
375impl PruneStats {
376 #[must_use]
378 pub fn blobs_sum(&self) -> SizeStats {
379 self.blobs
380 .values()
381 .fold(SizeStats::default(), |acc, x| acc + *x)
382 }
383
384 #[must_use]
386 pub fn size_sum(&self) -> SizeStats {
387 self.size
388 .values()
389 .fold(SizeStats::default(), |acc, x| acc + *x)
390 }
391}
392
393#[derive(Debug)]
395struct PruneIndex {
396 id: IndexId,
398 modified: bool,
400 packs: Vec<PrunePack>,
402}
403
404impl PruneIndex {
405 fn len(&self) -> usize {
407 self.packs.iter().map(|p| p.blobs.len()).sum()
408 }
409}
410
411#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Default)]
413pub enum PackToDo {
414 #[default]
416 Undecided,
417 Keep,
419 Repack,
421 MarkDelete,
423 KeepMarked,
425 KeepMarkedAndCorrect,
427 Recover,
429 Delete,
431}
432
433#[derive(Debug)]
435struct PrunePack {
436 id: PackId,
438 blob_type: BlobType,
440 size: u32,
442 delete_mark: bool,
444 to_do: PackToDo,
446 time: Option<DateTime<Local>>,
448 blobs: Vec<IndexBlob>,
450}
451
452impl PrunePack {
453 fn from_index_pack(p: IndexPack, delete_mark: bool) -> Self {
460 Self {
461 id: p.id,
462 blob_type: p.blob_type(),
463 size: p.pack_size(),
464 delete_mark,
465 to_do: PackToDo::Undecided,
466 time: p.time,
467 blobs: p.blobs,
468 }
469 }
470
471 fn from_index_pack_unmarked(p: IndexPack) -> Self {
477 Self::from_index_pack(p, false)
478 }
479
480 fn from_index_pack_marked(p: IndexPack) -> Self {
486 Self::from_index_pack(p, true)
487 }
488
489 fn into_index_pack(self) -> IndexPack {
491 IndexPack {
492 id: self.id,
493 time: self.time,
494 size: None,
495 blobs: self.blobs,
496 }
497 }
498
499 fn into_index_pack_with_time(self, time: DateTime<Local>) -> IndexPack {
505 IndexPack {
506 id: self.id,
507 time: Some(time),
508 size: None,
509 blobs: self.blobs,
510 }
511 }
512
513 #[allow(clippy::similar_names)]
521 fn set_todo(
522 &mut self,
523 todo: PackToDo,
524 pi: &PackInfo,
525 status: EnumSet<PackStatus>,
526 stats: &mut PruneStats,
527 ) {
528 let tpe = self.blob_type;
529 stats.debug.add(pi, todo, status);
530 match todo {
531 PackToDo::Undecided => panic!("not possible"),
532 PackToDo::Keep => {
533 stats.packs.keep += 1;
534 }
535 PackToDo::Repack => {
536 stats.packs.repack += 1;
537 stats.blobs[tpe].repack += u64::from(pi.unused_blobs + pi.used_blobs);
538 stats.blobs[tpe].repackrm += u64::from(pi.unused_blobs);
539 stats.size[tpe].repack += u64::from(pi.unused_size + pi.used_size);
540 stats.size[tpe].repackrm += u64::from(pi.unused_size);
541 }
542
543 PackToDo::MarkDelete => {
544 stats.blobs[tpe].remove += u64::from(pi.unused_blobs);
545 stats.size[tpe].remove += u64::from(pi.unused_size);
546 }
547 PackToDo::Recover => {
548 stats.packs_to_delete.recover += 1;
549 stats.size_to_delete.recover += u64::from(self.size);
550 }
551 PackToDo::Delete => {
552 stats.packs_to_delete.remove += 1;
553 stats.size_to_delete.remove += u64::from(self.size);
554 }
555 PackToDo::KeepMarked | PackToDo::KeepMarkedAndCorrect => {
556 stats.packs_to_delete.keep += 1;
557 stats.size_to_delete.keep += u64::from(self.size);
558 }
559 }
560 self.to_do = todo;
561 }
562
563 fn is_compressed(&self) -> bool {
565 self.blobs
566 .iter()
567 .all(|blob| blob.uncompressed_length.is_some())
568 }
569}
570
571#[derive(PartialEq, Eq, Debug)]
573enum RepackReason {
574 PartlyUsed,
576 ToCompress,
578 SizeMismatch,
580}
581
582#[derive(Debug)]
584pub struct PrunePlan {
585 time: DateTime<Local>,
587 used_ids: BTreeMap<BlobId, u8>,
589 existing_packs: BTreeMap<PackId, u32>,
591 repack_candidates: Vec<(PackInfo, EnumSet<PackStatus>, RepackReason, usize, usize)>,
593 index_files: Vec<PruneIndex>,
595 pub stats: PruneStats,
597}
598
599impl PrunePlan {
600 fn new(
608 used_ids: BTreeMap<BlobId, u8>,
609 existing_packs: BTreeMap<PackId, u32>,
610 index_files: Vec<(IndexId, IndexFile)>,
611 ) -> Self {
612 let mut processed_packs = BTreeSet::new();
613 let mut processed_packs_delete = BTreeSet::new();
614 let mut index_files: Vec<_> = index_files
615 .into_iter()
616 .map(|(id, index)| {
617 let mut modified = false;
618 let mut packs: Vec<_> = index
619 .packs
620 .into_iter()
621 .filter(|p| {
623 let no_duplicate = processed_packs.insert(p.id);
624 modified |= !no_duplicate;
625 no_duplicate
626 })
627 .map(PrunePack::from_index_pack_unmarked)
628 .collect();
629 packs.extend(
630 index
631 .packs_to_delete
632 .into_iter()
633 .filter(|p| {
635 let no_duplicate = processed_packs_delete.insert(p.id);
636 modified |= !no_duplicate;
637 no_duplicate
638 })
639 .map(PrunePack::from_index_pack_marked),
640 );
641
642 PruneIndex {
643 id,
644 modified,
645 packs,
646 }
647 })
648 .collect();
649
650 for index in &mut index_files {
652 let mut modified = false;
653 index.packs.retain(|p| {
654 !p.delete_mark || {
655 let duplicate = processed_packs.contains(&p.id);
656 modified |= duplicate;
657 !duplicate
658 }
659 });
660
661 index.modified |= modified;
662 }
663
664 Self {
665 time: Local::now(),
666 used_ids,
667 existing_packs,
668 repack_candidates: Vec::new(),
669 index_files,
670 stats: PruneStats::default(),
671 }
672 }
673
674 pub fn from_prune_options<P: ProgressBars, S: Open>(
691 repo: &Repository<P, S>,
692 opts: &PruneOptions,
693 ) -> RusticResult<Self> {
694 let pb = &repo.pb;
695 let be = repo.dbe();
696
697 let version = repo.config().version;
698
699 if version < 2 && opts.repack_uncompressed {
700 return Err(RusticError::new(
701 ErrorKind::Unsupported,
702 "Repacking uncompressed pack is unsupported in Repository version `{config_version}`.",
703 )
704 .attach_context("config_version", version.to_string()));
705 }
706
707 let mut index_files = Vec::new();
708
709 let p = pb.progress_counter("reading index...");
710 let mut index_collector = IndexCollector::new(IndexType::OnlyTrees);
711
712 for index in be.stream_all::<IndexFile>(&p)? {
713 let (id, index) = index?;
714 index_collector.extend(index.packs.clone());
715 index_collector.extend(index.packs_to_delete.clone());
718
719 index_files.push((id, index));
720 }
721 p.finish();
722
723 let (used_ids, total_size) = {
724 let index = GlobalIndex::new_from_index(index_collector.into_index());
725 let total_size = BlobTypeMap::init(|blob_type| index.total_size(blob_type));
726 let used_ids = find_used_blobs(be, &index, &opts.ignore_snaps, pb)?;
727 (used_ids, total_size)
728 };
729
730 let p = pb.progress_spinner("getting packs from repository...");
732 let existing_packs: BTreeMap<_, _> = be
733 .list_with_size(FileType::Pack)?
734 .into_iter()
735 .map(|(id, size)| (PackId::from(id), size))
736 .collect();
737 p.finish();
738
739 let mut pruner = Self::new(used_ids, existing_packs, index_files);
740 pruner.count_used_blobs();
741 pruner.check()?;
742 let repack_cacheable_only = opts
743 .repack_cacheable_only
744 .unwrap_or_else(|| repo.config().is_hot == Some(true));
745 let pack_sizer =
746 total_size.map(|tpe, size| PackSizer::from_config(repo.config(), tpe, size));
747
748 pruner.decide_packs(
749 Duration::from_std(*opts.keep_pack).map_err(|err| {
750 RusticError::with_source(
751 ErrorKind::Internal,
752 "Failed to convert keep_pack duration `{keep_pack}` to std::time::Duration.",
753 err,
754 )
755 .attach_context("keep_pack", opts.keep_pack.to_string())
756 })?,
757 Duration::from_std(*opts.keep_delete).map_err(|err| {
758 RusticError::with_source(
759 ErrorKind::Internal,
760 "Failed to convert keep_delete duration `{keep_delete}` to std::time::Duration.",
761 err,
762 )
763 .attach_context("keep_delete", opts.keep_delete.to_string())
764 })?,
765 repack_cacheable_only,
766 opts.repack_uncompressed,
767 opts.repack_all,
768 &pack_sizer,
769 )?;
770
771 pruner.decide_repack(
772 &opts.max_repack,
773 &opts.max_unused,
774 opts.repack_uncompressed || opts.repack_all,
775 opts.no_resize,
776 &pack_sizer,
777 );
778
779 pruner.check_existing_packs()?;
780 pruner.filter_index_files(opts.instant_delete);
781
782 Ok(pruner)
783 }
784
785 fn count_used_blobs(&mut self) {
787 for blob in self
788 .index_files
789 .iter()
790 .flat_map(|index| &index.packs)
791 .flat_map(|pack| &pack.blobs)
792 {
793 if let Some(count) = self.used_ids.get_mut(&blob.id) {
794 *count = count.saturating_add(1);
798 }
799 }
800 }
801
802 fn check(&self) -> RusticResult<()> {
808 for (id, count) in &self.used_ids {
809 if *count == 0 {
810 return Err(RusticError::new(
811 ErrorKind::Internal,
812 "Blob ID `{blob_id}` is missing in index files.",
813 )
814 .attach_context("blob_id", id.to_string())
815 .ask_report());
816 }
817 }
818
819 Ok(())
820 }
821
822 #[allow(clippy::too_many_lines)]
837 #[allow(clippy::unnecessary_wraps)]
838 fn decide_packs(
839 &mut self,
840 keep_pack: Duration,
841 keep_delete: Duration,
842 repack_cacheable_only: bool,
843 repack_uncompressed: bool,
844 repack_all: bool,
845 pack_sizer: &BlobTypeMap<PackSizer>,
846 ) -> RusticResult<()> {
847 for mark_case in [true, false] {
852 for (index_num, index) in self.index_files.iter_mut().enumerate() {
853 for (pack_num, pack) in index
854 .packs
855 .iter_mut()
856 .enumerate()
857 .filter(|(_, p)| p.delete_mark == mark_case)
858 {
859 let pi = PackInfo::from_pack(pack, &mut self.used_ids);
860 self.stats.blobs[pi.blob_type].used += u64::from(pi.used_blobs);
862 self.stats.blobs[pi.blob_type].unused += u64::from(pi.unused_blobs);
863 self.stats.size[pi.blob_type].used += u64::from(pi.used_size);
864 self.stats.size[pi.blob_type].unused += u64::from(pi.unused_size);
865 let mut status = EnumSet::empty();
866
867 let too_young = pack.time > Some(self.time - keep_pack);
869 if too_young && !pack.delete_mark {
870 _ = status.insert(PackStatus::TooYoung);
871 }
872 let keep_uncacheable = repack_cacheable_only && !pack.blob_type.is_cacheable();
873
874 let to_compress = repack_uncompressed && !pack.is_compressed();
875 if to_compress {
876 _ = status.insert(PackStatus::NotCompressed);
877 }
878 let size_mismatch = !pack_sizer[pack.blob_type].size_ok(pack.size);
879 if pack_sizer[pack.blob_type].is_too_small(pack.size) {
880 _ = status.insert(PackStatus::TooSmall);
881 }
882 if pack_sizer[pack.blob_type].is_too_large(pack.size) {
883 _ = status.insert(PackStatus::TooLarge);
884 }
885 match (pack.delete_mark, pi.used_blobs, pi.unused_blobs) {
886 (false, 0, _) => {
887 self.stats.packs.unused += 1;
889 _ = status.insert(PackStatus::HasUnusedBlobs);
890 if too_young {
891 pack.set_todo(PackToDo::Keep, &pi, status, &mut self.stats);
893 } else {
894 pack.set_todo(PackToDo::MarkDelete, &pi, status, &mut self.stats);
895 }
896 }
897 (false, 1.., 0) => {
898 self.stats.packs.used += 1;
900 _ = status.insert(PackStatus::HasUsedBlobs);
901 if too_young || keep_uncacheable {
902 pack.set_todo(PackToDo::Keep, &pi, status, &mut self.stats);
903 } else if to_compress || repack_all {
904 self.repack_candidates.push((
905 pi,
906 status,
907 RepackReason::ToCompress,
908 index_num,
909 pack_num,
910 ));
911 } else if size_mismatch {
912 self.repack_candidates.push((
913 pi,
914 status,
915 RepackReason::SizeMismatch,
916 index_num,
917 pack_num,
918 ));
919 } else {
920 pack.set_todo(PackToDo::Keep, &pi, status, &mut self.stats);
921 }
922 }
923
924 (false, 1.., 1..) => {
925 self.stats.packs.partly_used += 1;
927 status
928 .insert_all(PackStatus::HasUsedBlobs | PackStatus::HasUnusedBlobs);
929
930 if too_young || keep_uncacheable {
931 pack.set_todo(PackToDo::Keep, &pi, status, &mut self.stats);
933 } else {
934 self.repack_candidates.push((
936 pi,
937 status,
938 RepackReason::PartlyUsed,
939 index_num,
940 pack_num,
941 ));
942 }
943 }
944 (true, 0, _) => {
945 _ = status.insert(PackStatus::Marked);
946 match pack.time {
947 Some(local_date_time)
949 if self.time - local_date_time >= keep_delete =>
950 {
951 _ = status.insert(PackStatus::TooYoung);
952 pack.set_todo(PackToDo::Delete, &pi, status, &mut self.stats);
953 }
954 None => {
955 warn!(
956 "pack to delete {}: no time set, this should not happen! Keeping this pack.",
957 pack.id
958 );
959 _ = status.insert(PackStatus::TimeNotSet);
960 pack.set_todo(
961 PackToDo::KeepMarkedAndCorrect,
962 &pi,
963 status,
964 &mut self.stats,
965 );
966 }
967 Some(_) => pack.set_todo(
968 PackToDo::KeepMarked,
969 &pi,
970 status,
971 &mut self.stats,
972 ),
973 }
974 }
975 (true, 1.., _) => {
976 status.insert_all(PackStatus::Marked | PackStatus::HasUsedBlobs);
977 pack.set_todo(PackToDo::Recover, &pi, status, &mut self.stats);
979 }
980 }
981 }
982 }
983 }
984 Ok(())
985 }
986
987 fn decide_repack(
1001 &mut self,
1002 max_repack: &LimitOption,
1003 max_unused: &LimitOption,
1004 repack_uncompressed: bool,
1005 no_resize: bool,
1006 pack_sizer: &BlobTypeMap<PackSizer>,
1007 ) {
1008 let max_unused = match (repack_uncompressed, max_unused) {
1009 (true, _) => 0,
1010 (false, LimitOption::Unlimited) => u64::MAX,
1011 (false, LimitOption::Size(size)) => size.as_u64(),
1012 (false, LimitOption::Percentage(p)) => (p * self.stats.size_sum().used) / (100 - p),
1016 };
1017
1018 let max_repack = match max_repack {
1019 LimitOption::Unlimited => u64::MAX,
1020 LimitOption::Size(size) => size.as_u64(),
1021 LimitOption::Percentage(p) => (p * self.stats.size_sum().total()) / 100,
1022 };
1023
1024 self.repack_candidates.sort_unstable_by_key(|rc| rc.0);
1025 let mut resize_packs = BlobTypeMap::<Vec<_>>::default();
1026 let mut do_repack = BlobTypeMap::default();
1027 let mut repack_size = BlobTypeMap::<u64>::default();
1028
1029 for (pi, status, repack_reason, index_num, pack_num) in
1030 std::mem::take(&mut self.repack_candidates)
1031 {
1032 let pack = &mut self.index_files[index_num].packs[pack_num];
1033 let blob_type = pi.blob_type;
1034
1035 let total_repack_size: u64 = repack_size.into_values().sum();
1036 if total_repack_size + u64::from(pi.used_size) >= max_repack
1037 || (self.stats.size_sum().unused_after_prune() < max_unused
1038 && repack_reason == RepackReason::PartlyUsed
1039 && blob_type == BlobType::Data)
1040 || (repack_reason == RepackReason::SizeMismatch && no_resize)
1041 {
1042 pack.set_todo(PackToDo::Keep, &pi, status, &mut self.stats);
1043 } else if repack_reason == RepackReason::SizeMismatch {
1044 resize_packs[blob_type].push((pi, status, index_num, pack_num));
1045 repack_size[blob_type] += u64::from(pi.used_size);
1046 } else {
1047 pack.set_todo(PackToDo::Repack, &pi, status, &mut self.stats);
1048 repack_size[blob_type] += u64::from(pi.used_size);
1049 do_repack[blob_type] = true;
1050 }
1051 }
1052 for (blob_type, resize_packs) in resize_packs {
1053 let todo = if do_repack[blob_type]
1056 || repack_size[blob_type] > u64::from(pack_sizer[blob_type].pack_size())
1057 {
1058 PackToDo::Repack
1059 } else {
1060 PackToDo::Keep
1061 };
1062 for (pi, status, index_num, pack_num) in resize_packs {
1063 let pack = &mut self.index_files[index_num].packs[pack_num];
1064 pack.set_todo(todo, &pi, status, &mut self.stats);
1065 }
1066 }
1067 }
1068
1069 fn check_existing_packs(&mut self) -> RusticResult<()> {
1077 for pack in self.index_files.iter().flat_map(|index| &index.packs) {
1078 let existing_size = self.existing_packs.remove(&pack.id);
1079
1080 let check_size = || {
1082 match existing_size {
1083 Some(size) if size == pack.size => Ok(()), Some(size) => Err(RusticError::new(
1085 ErrorKind::Internal,
1086 "Pack size `{size_in_pack_real}` of id `{pack_id}` does not match the expected size `{size_in_index_expected}` in the index file. ",
1087 )
1088 .attach_context("pack_id", pack.id.to_string())
1089 .attach_context("size_in_index_expected", pack.size.to_string())
1090 .attach_context("size_in_pack_real", size.to_string())
1091 .ask_report()),
1092 None => Err(RusticError::new(ErrorKind::Internal, "Pack `{pack_id}` does not exist.")
1093 .attach_context("pack_id", pack.id.to_string())
1094 .ask_report()),
1095 }
1096 };
1097
1098 match pack.to_do {
1099 PackToDo::Undecided => {
1100 return Err(RusticError::new(
1101 ErrorKind::Internal,
1102 "Pack `{pack_id}` got no decision what to do with it!",
1103 )
1104 .attach_context("pack_id", pack.id.to_string())
1105 .ask_report());
1106 }
1107 PackToDo::Keep | PackToDo::Recover => {
1108 for blob in &pack.blobs {
1109 _ = self.used_ids.remove(&blob.id);
1110 }
1111 check_size()?;
1112 }
1113 PackToDo::Repack => {
1114 check_size()?;
1115 }
1116 PackToDo::MarkDelete
1117 | PackToDo::Delete
1118 | PackToDo::KeepMarked
1119 | PackToDo::KeepMarkedAndCorrect => {}
1120 }
1121 }
1122
1123 for size in self.existing_packs.values() {
1125 self.stats.size_unref += u64::from(*size);
1126 }
1127 self.stats.packs_unref = self.existing_packs.len() as u64;
1128
1129 Ok(())
1130 }
1131
1132 fn filter_index_files(&mut self, instant_delete: bool) {
1138 let mut any_must_modify = false;
1139 self.stats.index_files = self.index_files.len() as u64;
1140 self.index_files.retain(|index| {
1142 let must_modify = index.modified
1145 || index.packs.iter().any(|p| {
1146 p.to_do != PackToDo::Keep && (instant_delete || p.to_do != PackToDo::KeepMarked)
1147 });
1148
1149 any_must_modify |= must_modify;
1150
1151 must_modify || index.len() < constants::MIN_INDEX_LEN
1153 });
1154
1155 if !any_must_modify && self.index_files.len() == 1 {
1156 self.index_files.clear();
1158 }
1159
1160 self.stats.index_files_rebuild = self.index_files.len() as u64;
1161
1162 }
1165
1166 #[must_use]
1168 pub fn repack_packs(&self) -> Vec<PackId> {
1169 self.index_files
1170 .iter()
1171 .flat_map(|index| &index.packs)
1172 .filter(|pack| pack.to_do == PackToDo::Repack)
1173 .map(|pack| pack.id)
1174 .collect()
1175 }
1176
1177 #[allow(clippy::significant_drop_tightening)]
1197 #[allow(clippy::too_many_lines)]
1198 #[deprecated(since = "0.5.2", note = "Use `Repository::prune()` instead.")]
1199 pub fn do_prune<P: ProgressBars, S: Open>(
1200 self,
1201 repo: &Repository<P, S>,
1202 opts: &PruneOptions,
1203 ) -> RusticResult<()> {
1204 repo.prune(opts, self)
1205 }
1206}
1207
1208#[allow(clippy::significant_drop_tightening)]
1229#[allow(clippy::too_many_lines)]
1230pub(crate) fn prune_repository<P: ProgressBars, S: Open>(
1231 repo: &Repository<P, S>,
1232 opts: &PruneOptions,
1233 prune_plan: PrunePlan,
1234) -> RusticResult<()> {
1235 if repo.config().append_only == Some(true) {
1236 return Err(RusticError::new(
1237 ErrorKind::AppendOnly,
1238 "Pruning is not allowed in append-only repositories. Please disable append-only mode first, if you know what you are doing. Aborting.",
1239 ));
1240 }
1241 repo.warm_up_wait(prune_plan.repack_packs().into_iter())?;
1242 let be = repo.dbe();
1243 let pb = &repo.pb;
1244
1245 let indexer = Indexer::new_unindexed(be.clone()).into_shared();
1246
1247 let size_after_prune = BlobTypeMap::init(|blob_type| {
1258 prune_plan.stats.size[blob_type].total_after_prune()
1259 + prune_plan.stats.blobs[blob_type].total_after_prune()
1260 * u64::from(HeaderEntry::ENTRY_LEN_COMPRESSED)
1261 });
1262
1263 let tree_repacker = Repacker::new(
1264 be.clone(),
1265 BlobType::Tree,
1266 indexer.clone(),
1267 repo.config(),
1268 size_after_prune[BlobType::Tree],
1269 )?;
1270
1271 let data_repacker = Repacker::new(
1272 be.clone(),
1273 BlobType::Data,
1274 indexer.clone(),
1275 repo.config(),
1276 size_after_prune[BlobType::Data],
1277 )?;
1278
1279 if !prune_plan.existing_packs.is_empty() {
1281 if opts.instant_delete {
1282 let p = pb.progress_counter("removing unindexed packs...");
1283 let existing_packs: Vec<_> = prune_plan.existing_packs.into_keys().collect();
1284 be.delete_list(true, existing_packs.iter(), p)?;
1285 } else {
1286 let p = pb.progress_counter("marking unneeded unindexed pack files for deletion...");
1287 p.set_length(prune_plan.existing_packs.len().try_into().unwrap());
1288 for (id, size) in prune_plan.existing_packs {
1289 let pack = IndexPack {
1290 id,
1291 size: Some(size),
1292 time: Some(Local::now()),
1293 blobs: Vec::new(),
1294 };
1295 indexer.write().unwrap().add_remove(pack)?;
1296 p.inc(1);
1297 }
1298 p.finish();
1299 }
1300 }
1301
1302 let p = match (
1304 prune_plan.index_files.is_empty(),
1305 prune_plan.stats.packs.repack > 0,
1306 ) {
1307 (true, _) => {
1308 info!("nothing to do!");
1309 pb.progress_hidden()
1310 }
1311 (false, true) => pb.progress_bytes("repacking // rebuilding index..."),
1313 (false, false) => pb.progress_spinner("rebuilding index..."),
1314 };
1315
1316 p.set_length(prune_plan.stats.size_sum().repack - prune_plan.stats.size_sum().repackrm);
1317
1318 let mut indexes_remove = Vec::new();
1319 let tree_packs_remove = Arc::new(Mutex::new(Vec::new()));
1320 let data_packs_remove = Arc::new(Mutex::new(Vec::new()));
1321
1322 let delete_pack = |pack: PrunePack| {
1323 match pack.blob_type {
1325 BlobType::Data => data_packs_remove.lock().unwrap().push(pack.id),
1326 BlobType::Tree => tree_packs_remove.lock().unwrap().push(pack.id),
1327 }
1328 };
1329
1330 let used_ids = Arc::new(Mutex::new(prune_plan.used_ids));
1331
1332 let packs: Vec<_> = prune_plan
1333 .index_files
1334 .into_iter()
1335 .inspect(|index| {
1336 indexes_remove.push(index.id);
1337 })
1338 .flat_map(|index| index.packs)
1339 .collect();
1340
1341 if !indexes_remove.is_empty() && opts.early_delete_index {
1343 let p = pb.progress_counter("removing old index files...");
1344 be.delete_list(true, indexes_remove.iter(), p)?;
1345 }
1346
1347 packs
1349 .into_par_iter()
1350 .try_for_each(|pack| -> RusticResult<_> {
1351 match pack.to_do {
1352 PackToDo::Undecided => {
1353 return Err(RusticError::new(
1354 ErrorKind::Internal,
1355 "Pack `{pack_id}` got no decision what to do with it!",
1356 )
1357 .attach_context("pack_id", pack.id.to_string())
1358 .ask_report());
1359 }
1360 PackToDo::Keep => {
1361 let pack = pack.into_index_pack();
1363 indexer.write().unwrap().add(pack)?;
1364 }
1365 PackToDo::Repack => {
1366 for blob in &pack.blobs {
1368 if used_ids.lock().unwrap().remove(&blob.id).is_none() {
1369 continue;
1371 }
1372
1373 let repacker = match blob.tpe {
1374 BlobType::Data => &data_repacker,
1375 BlobType::Tree => &tree_repacker,
1376 };
1377 if opts.fast_repack {
1378 repacker.add_fast(&pack.id, blob)?;
1379 } else {
1380 repacker.add(&pack.id, blob)?;
1381 }
1382 p.inc(u64::from(blob.length));
1383 }
1384 if opts.instant_delete {
1385 delete_pack(pack);
1386 } else {
1387 let pack = pack.into_index_pack_with_time(prune_plan.time);
1389 indexer.write().unwrap().add_remove(pack)?;
1390 }
1391 }
1392 PackToDo::MarkDelete => {
1393 if opts.instant_delete {
1394 delete_pack(pack);
1395 } else {
1396 let pack = pack.into_index_pack_with_time(prune_plan.time);
1398 indexer.write().unwrap().add_remove(pack)?;
1399 }
1400 }
1401 PackToDo::KeepMarked | PackToDo::KeepMarkedAndCorrect => {
1402 if opts.instant_delete {
1403 delete_pack(pack);
1404 } else {
1405 let time = pack.time.unwrap_or(prune_plan.time);
1408 let pack = pack.into_index_pack_with_time(time);
1409 indexer.write().unwrap().add_remove(pack)?;
1410 }
1411 }
1412 PackToDo::Recover => {
1413 let pack = pack.into_index_pack_with_time(prune_plan.time);
1415 indexer.write().unwrap().add(pack)?;
1416 }
1417 PackToDo::Delete => delete_pack(pack),
1418 }
1419 Ok(())
1420 })?;
1421 _ = tree_repacker.finalize()?;
1422 _ = data_repacker.finalize()?;
1423 indexer.write().unwrap().finalize()?;
1424 p.finish();
1425
1426 if !indexes_remove.is_empty() && !opts.early_delete_index {
1428 let p = pb.progress_counter("removing old index files...");
1429 be.delete_list(true, indexes_remove.iter(), p)?;
1430 }
1431
1432 let data_packs_remove = data_packs_remove.lock().unwrap();
1434 if !data_packs_remove.is_empty() {
1435 let p = pb.progress_counter("removing old data packs...");
1436 be.delete_list(false, data_packs_remove.iter(), p)?;
1437 }
1438
1439 let tree_packs_remove = tree_packs_remove.lock().unwrap();
1441 if !tree_packs_remove.is_empty() {
1442 let p = pb.progress_counter("removing old tree packs...");
1443 be.delete_list(true, tree_packs_remove.iter(), p)?;
1444 }
1445
1446 Ok(())
1447}
1448
1449#[derive(PartialEq, Eq, Clone, Copy, Debug)]
1451struct PackInfo {
1452 blob_type: BlobType,
1454 used_blobs: u16,
1456 unused_blobs: u16,
1458 used_size: u32,
1460 unused_size: u32,
1462}
1463
1464impl PartialOrd<Self> for PackInfo {
1465 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1466 Some(self.cmp(other))
1467 }
1468}
1469
1470impl Ord for PackInfo {
1471 fn cmp(&self, other: &Self) -> Ordering {
1472 self.blob_type.cmp(&other.blob_type).then(
1474 (u64::from(other.unused_size) * u64::from(self.used_size))
1478 .cmp(&(u64::from(self.unused_size) * u64::from(other.used_size))),
1479 )
1480 }
1481}
1482
1483impl PackInfo {
1484 fn from_pack(pack: &PrunePack, used_ids: &mut BTreeMap<BlobId, u8>) -> Self {
1491 let mut pi = Self {
1492 blob_type: pack.blob_type,
1493 used_blobs: 0,
1494 unused_blobs: 0,
1495 used_size: 0,
1496 unused_size: 0,
1497 };
1498
1499 let first_needed = pack.blobs.iter().position(|blob| {
1506 match used_ids.get_mut(&blob.id) {
1507 None | Some(0) => {
1508 pi.unused_size += blob.length;
1509 pi.unused_blobs += 1;
1510 }
1511 Some(count) => {
1512 *count -= 1;
1514 if *count == 0 {
1515 pi.used_size += blob.length;
1517 pi.used_blobs += 1;
1518 return true; }
1520 pi.unused_size += blob.length;
1522 pi.unused_blobs += 1;
1523 }
1524 }
1525 false });
1527
1528 if let Some(first_needed) = first_needed {
1529 for blob in &pack.blobs[..first_needed] {
1532 match used_ids.get_mut(&blob.id) {
1533 None | Some(0) => {} Some(count) => {
1535 pi.unused_size -= blob.length;
1537 pi.unused_blobs -= 1;
1538 pi.used_size += blob.length;
1539 pi.used_blobs += 1;
1540 *count = 0; }
1542 }
1543 }
1544 for blob in &pack.blobs[first_needed + 1..] {
1546 match used_ids.get_mut(&blob.id) {
1547 None | Some(0) => {
1548 pi.unused_size += blob.length;
1549 pi.unused_blobs += 1;
1550 }
1551 Some(count) => {
1552 pi.used_size += blob.length;
1554 pi.used_blobs += 1;
1555 *count = 0; }
1557 }
1558 }
1559 }
1560
1561 pi
1562 }
1563}
1564
1565fn find_used_blobs(
1577 be: &impl DecryptReadBackend,
1578 index: &impl ReadGlobalIndex,
1579 ignore_snaps: &[SnapshotId],
1580 pb: &impl ProgressBars,
1581) -> RusticResult<BTreeMap<BlobId, u8>> {
1582 let ignore_snaps: BTreeSet<_> = ignore_snaps.iter().collect();
1583
1584 let p = pb.progress_counter("reading snapshots...");
1585 let list: Vec<_> = be
1586 .list(FileType::Snapshot)?
1587 .into_iter()
1588 .map(SnapshotId::from)
1589 .filter(|id| !ignore_snaps.contains(&id))
1590 .collect();
1591 let snap_trees: Vec<_> = be
1592 .stream_list::<SnapshotFile>(&list, &p)?
1593 .into_iter()
1594 .map_ok(|(_, snap)| snap.tree)
1595 .try_collect()?;
1596 p.finish();
1597
1598 let mut ids: BTreeMap<_, _> = snap_trees
1599 .iter()
1600 .map(|id| (BlobId::from(**id), 0))
1601 .collect();
1602 let p = pb.progress_counter("finding used blobs...");
1603
1604 let mut tree_streamer = TreeStreamerOnce::new(be, index, snap_trees, p)?;
1605 while let Some(item) = tree_streamer.next().transpose()? {
1606 let (_, tree) = item;
1607 for node in tree.nodes {
1608 match node.node_type {
1609 NodeType::File => {
1610 ids.extend(
1611 node.content
1612 .iter()
1613 .flatten()
1614 .map(|id| (BlobId::from(**id), 0)),
1615 );
1616 }
1617 NodeType::Dir => {
1618 _ = ids.insert(BlobId::from(*node.subtree.unwrap()), 0);
1619 }
1620 _ => {} }
1622 }
1623 }
1624
1625 Ok(ids)
1626}