rustic_core/commands/
prune.rs

1//! `prune` subcommand
2
3/// App-local prelude includes `app_reader()`/`app_writer()`/`app_config()`
4/// accessors along with logging macros. Customize as you see fit.
5use std::{
6    cmp::Ordering,
7    collections::{BTreeMap, BTreeSet},
8    str::FromStr,
9    sync::{Arc, Mutex},
10};
11
12use bytesize::ByteSize;
13use chrono::{DateTime, Duration, Local};
14use derive_more::Add;
15use derive_setters::Setters;
16use enumset::{EnumSet, EnumSetType};
17use itertools::Itertools;
18use log::{info, warn};
19use rayon::prelude::{IntoParallelIterator, ParallelIterator};
20use serde::{Deserialize, Serialize};
21
22use crate::{
23    backend::{
24        FileType, ReadBackend,
25        decrypt::{DecryptReadBackend, DecryptWriteBackend},
26        node::NodeType,
27    },
28    blob::{
29        BlobId, BlobType, BlobTypeMap, Initialize,
30        packer::{PackSizer, Repacker},
31        tree::TreeStreamerOnce,
32    },
33    error::{ErrorKind, RusticError, RusticResult},
34    index::{
35        GlobalIndex, ReadGlobalIndex, ReadIndex,
36        binarysorted::{IndexCollector, IndexType},
37        indexer::Indexer,
38    },
39    progress::{Progress, ProgressBars},
40    repofile::{
41        HeaderEntry, IndexBlob, IndexFile, IndexPack, SnapshotFile, SnapshotId, indexfile::IndexId,
42        packfile::PackId,
43    },
44    repository::{Open, Repository},
45};
46
47pub(super) mod constants {
48    /// Minimum size of an index file to be considered for pruning
49    pub(super) const MIN_INDEX_LEN: usize = 10_000;
50}
51
52#[allow(clippy::struct_excessive_bools)]
53#[cfg_attr(feature = "clap", derive(clap::Parser))]
54#[derive(Debug, Clone, Setters)]
55#[setters(into)]
56#[non_exhaustive]
57/// Options for the `prune` command
58pub struct PruneOptions {
59    /// Define maximum data to repack in % of reposize or as size (e.g. '5b', '2 kB', '3M', '4TiB') or 'unlimited'
60    #[cfg_attr(
61        feature = "clap",
62        clap(long, value_name = "LIMIT", default_value = "10%")
63    )]
64    pub max_repack: LimitOption,
65
66    /// Tolerate limit of unused data in % of reposize after pruning or as size (e.g. '5b', '2 kB', '3M', '4TiB') or 'unlimited'
67    #[cfg_attr(
68        feature = "clap",
69        clap(long, value_name = "LIMIT", default_value = "5%")
70    )]
71    pub max_unused: LimitOption,
72
73    /// Minimum duration (e.g. 90d) to keep packs before repacking or removing. More recently created
74    /// packs won't be repacked or marked for deletion within this prune run.
75    #[cfg_attr(
76        feature = "clap",
77        clap(long, value_name = "DURATION", default_value = "0d")
78    )]
79    pub keep_pack: humantime::Duration,
80
81    /// Minimum duration (e.g. 10m) to keep packs marked for deletion. More recently marked packs won't be
82    /// deleted within this prune run.
83    #[cfg_attr(
84        feature = "clap",
85        clap(long, value_name = "DURATION", default_value = "23h")
86    )]
87    pub keep_delete: humantime::Duration,
88
89    /// Delete files immediately instead of marking them. This also removes all files already marked for deletion.
90    ///
91    /// # Warning
92    ///
93    /// * Only use if you are sure the repository is not accessed by parallel processes!
94    #[cfg_attr(feature = "clap", clap(long))]
95    pub instant_delete: bool,
96
97    /// Delete index files early. This allows to run prune if there is few or no space left.
98    ///
99    /// # Warning
100    ///
101    /// * If prune aborts, this can lead to a (partly) missing index which must be repaired!
102    #[cfg_attr(feature = "clap", clap(long))]
103    pub early_delete_index: bool,
104
105    /// Simply copy blobs when repacking instead of decrypting; possibly compressing; encrypting
106    #[cfg_attr(feature = "clap", clap(long))]
107    pub fast_repack: bool,
108
109    /// Repack packs containing uncompressed blobs. This cannot be used with --fast-repack.
110    /// Implies --max-unused=0.
111    #[cfg_attr(feature = "clap", clap(long, conflicts_with = "fast_repack"))]
112    pub repack_uncompressed: bool,
113
114    /// Repack all packs. Implies --max-unused=0.
115    #[cfg_attr(feature = "clap", clap(long))]
116    pub repack_all: bool,
117
118    /// Only repack packs which are cacheable [default: true for a hot/cold repository, else false]
119    #[cfg_attr(feature = "clap", clap(long, value_name = "TRUE/FALSE"))]
120    pub repack_cacheable_only: Option<bool>,
121
122    /// Do not repack packs which only needs to be resized
123    #[cfg_attr(feature = "clap", clap(long))]
124    pub no_resize: bool,
125
126    #[cfg_attr(feature = "clap", clap(skip))]
127    /// Ignore these snapshots when looking for data-still-in-use.
128    ///
129    /// # Warning
130    ///
131    /// * Use this option with care!
132    /// * If you specify snapshots which are not deleted, running the resulting `PrunePlan`
133    ///   will remove data which is used within those snapshots!
134    pub ignore_snaps: Vec<SnapshotId>,
135}
136
137impl Default for PruneOptions {
138    fn default() -> Self {
139        Self {
140            max_repack: LimitOption::Percentage(10),
141            max_unused: LimitOption::Percentage(5),
142            keep_pack: std::time::Duration::from_secs(0).into(),
143            keep_delete: std::time::Duration::from_secs(82800).into(), // = 23h
144            instant_delete: false,
145            early_delete_index: false,
146            fast_repack: false,
147            repack_uncompressed: false,
148            repack_all: false,
149            repack_cacheable_only: None,
150            no_resize: false,
151            ignore_snaps: Vec::new(),
152        }
153    }
154}
155
156impl PruneOptions {
157    /// Get a `PrunePlan` from the given `PruneOptions`.
158    ///
159    /// # Type Parameters
160    ///
161    /// * `P` - The progress bar type.
162    /// * `S` - The state the repository is in.
163    ///
164    /// # Arguments
165    ///
166    /// * `repo` - The repository to get the `PrunePlan` for.
167    ///
168    /// # Errors
169    ///
170    /// * If `repack_uncompressed` is set and the repository is a version 1 repository
171    /// * If `keep_pack` or `keep_delete` is out of range
172    #[deprecated(
173        since = "0.5.2",
174        note = "Use `PrunePlan::from_prune_options()` instead"
175    )]
176    pub fn get_plan<P: ProgressBars, S: Open>(
177        &self,
178        repo: &Repository<P, S>,
179    ) -> RusticResult<PrunePlan> {
180        PrunePlan::from_prune_options(repo, self)
181    }
182}
183
184/// Enum to specify a size limit
185#[derive(Clone, Copy, Debug)]
186#[non_exhaustive]
187pub enum LimitOption {
188    /// Size in bytes
189    Size(ByteSize),
190    /// Size in percentage of repository size
191    Percentage(u64),
192    /// No limit
193    Unlimited,
194}
195
196impl FromStr for LimitOption {
197    type Err = Box<RusticError>;
198    fn from_str(s: &str) -> Result<Self, Self::Err> {
199        Ok(match s.chars().last().unwrap_or('0') {
200            '%' => Self::Percentage({
201                let mut copy = s.to_string();
202                _ = copy.pop();
203                copy.parse().map_err(|err| {
204                    RusticError::with_source(
205                        ErrorKind::InvalidInput,
206                        "Failed to parse percentage limit `{limit}`",
207                        err,
208                    )
209                    .attach_context("limit", s)
210                })?
211            }),
212            'd' if s == "unlimited" => Self::Unlimited,
213            _ => {
214                let byte_size = ByteSize::from_str(s).map_err(|err| {
215                    RusticError::with_source(
216                        ErrorKind::InvalidInput,
217                        "Failed to parse size limit `{limit}`",
218                        err,
219                    )
220                    .attach_context("limit", s)
221                })?;
222
223                Self::Size(byte_size)
224            }
225        })
226    }
227}
228
229#[derive(EnumSetType, Debug, PartialOrd, Ord, Serialize, Deserialize)]
230#[enumset(serialize_repr = "list")]
231pub enum PackStatus {
232    NotCompressed,
233    TooYoung,
234    TimeNotSet,
235    TooLarge,
236    TooSmall,
237    HasUnusedBlobs,
238    HasUsedBlobs,
239    Marked,
240}
241
242#[derive(Debug, Clone, Copy, Serialize)]
243pub struct DebugDetailedStats {
244    pub packs: u64,
245    pub unused_blobs: u64,
246    pub unused_size: u64,
247    pub used_blobs: u64,
248    pub used_size: u64,
249}
250
251#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq, PartialOrd, Ord)]
252pub struct DebugStatsKey {
253    pub todo: PackToDo,
254    pub blob_type: BlobType,
255    pub status: EnumSet<PackStatus>,
256}
257
258#[derive(Debug, Default, Serialize)]
259pub struct DebugStats(pub BTreeMap<DebugStatsKey, DebugDetailedStats>);
260
261impl DebugStats {
262    fn add(&mut self, pi: &PackInfo, todo: PackToDo, status: EnumSet<PackStatus>) {
263        let blob_type = pi.blob_type;
264        let details = self
265            .0
266            .entry(DebugStatsKey {
267                todo,
268                blob_type,
269                status,
270            })
271            .or_insert(DebugDetailedStats {
272                packs: 0,
273                unused_blobs: 0,
274                unused_size: 0,
275                used_blobs: 0,
276                used_size: 0,
277            });
278        details.packs += 1;
279        details.unused_blobs += u64::from(pi.unused_blobs);
280        details.unused_size += u64::from(pi.unused_size);
281        details.used_blobs += u64::from(pi.used_blobs);
282        details.used_size += u64::from(pi.used_size);
283    }
284}
285
286/// Statistics about what is deleted or kept within `prune`
287#[derive(Default, Debug, Clone, Copy)]
288pub struct DeleteStats {
289    /// Number of blobs to remove
290    pub remove: u64,
291    /// Number of blobs to recover
292    pub recover: u64,
293    /// Number of blobs to keep
294    pub keep: u64,
295}
296
297impl DeleteStats {
298    /// Returns the total number of blobs
299    pub const fn total(&self) -> u64 {
300        self.remove + self.recover + self.keep
301    }
302}
303#[derive(Debug, Default, Clone, Copy)]
304/// Statistics about packs within `prune`
305pub struct PackStats {
306    /// Number of used packs
307    pub used: u64,
308    /// Number of partly used packs
309    pub partly_used: u64,
310    /// Number of unused packs
311    pub unused: u64, // this equals to packs-to-remove
312    /// Number of packs-to-repack
313    pub repack: u64,
314    /// Number of packs-to-keep
315    pub keep: u64,
316}
317
318#[derive(Debug, Default, Clone, Copy, Add)]
319/// Statistics about sizes within `prune`
320pub struct SizeStats {
321    /// Number of used blobs
322    pub used: u64,
323    /// Number of unused blobs
324    pub unused: u64,
325    /// Number of blobs to remove
326    pub remove: u64,
327    /// Number of blobs to repack
328    pub repack: u64,
329    /// Number of blobs to remove after repacking
330    pub repackrm: u64,
331}
332
333impl SizeStats {
334    /// Returns the total number of blobs
335    pub const fn total(&self) -> u64 {
336        self.used + self.unused
337    }
338
339    /// Returns the total number of blobs after pruning
340    pub const fn total_after_prune(&self) -> u64 {
341        self.used + self.unused_after_prune()
342    }
343
344    /// Returns the total number of unused blobs after pruning
345    pub const fn unused_after_prune(&self) -> u64 {
346        self.unused - self.remove - self.repackrm
347    }
348}
349
350/// Statistics about a [`PrunePlan`]
351#[derive(Default, Debug)]
352pub struct PruneStats {
353    /// Statistics about pack count
354    pub packs_to_delete: DeleteStats,
355    /// Statistics about pack sizes
356    pub size_to_delete: DeleteStats,
357    /// Statistics about current pack situation
358    pub packs: PackStats,
359    /// Statistics about blobs in the repository
360    pub blobs: BlobTypeMap<SizeStats>,
361    /// Statistics about total sizes of blobs in the repository
362    pub size: BlobTypeMap<SizeStats>,
363    /// Number of unreferenced pack files
364    pub packs_unref: u64,
365    /// total size of unreferenced pack files
366    pub size_unref: u64,
367    /// Number of index files
368    pub index_files: u64,
369    /// Number of index files which will be rebuilt during the prune
370    pub index_files_rebuild: u64,
371    /// Detailed debug statistics
372    pub debug: DebugStats,
373}
374
375impl PruneStats {
376    /// Compute statistics about blobs of all types
377    #[must_use]
378    pub fn blobs_sum(&self) -> SizeStats {
379        self.blobs
380            .values()
381            .fold(SizeStats::default(), |acc, x| acc + *x)
382    }
383
384    /// Compute total size statistics for blobs of all types
385    #[must_use]
386    pub fn size_sum(&self) -> SizeStats {
387        self.size
388            .values()
389            .fold(SizeStats::default(), |acc, x| acc + *x)
390    }
391}
392
393// TODO: add documentation!
394#[derive(Debug)]
395struct PruneIndex {
396    /// The id of the index file
397    id: IndexId,
398    /// Whether the index file was modified
399    modified: bool,
400    /// The packs in the index file
401    packs: Vec<PrunePack>,
402}
403
404impl PruneIndex {
405    // TODO: add documentation!
406    fn len(&self) -> usize {
407        self.packs.iter().map(|p| p.blobs.len()).sum()
408    }
409}
410
411/// Task to be executed by a `PrunePlan` on Packs
412#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Default)]
413pub enum PackToDo {
414    // TODO: Add documentation
415    #[default]
416    Undecided,
417    /// The pack should be kept
418    Keep,
419    /// The pack should be repacked
420    Repack,
421    /// The pack should be marked for deletion
422    MarkDelete,
423    // TODO: Add documentation
424    KeepMarked,
425    // TODO: Add documentation
426    KeepMarkedAndCorrect,
427    /// The pack should be recovered
428    Recover,
429    /// The pack should be deleted
430    Delete,
431}
432
433/// A pack which is to be pruned
434#[derive(Debug)]
435struct PrunePack {
436    /// The id of the pack
437    id: PackId,
438    /// The type of the pack
439    blob_type: BlobType,
440    /// The size of the pack
441    size: u32,
442    /// Whether the pack is marked for deletion
443    delete_mark: bool,
444    /// The task to be executed on the pack
445    to_do: PackToDo,
446    /// The time the pack was created
447    time: Option<DateTime<Local>>,
448    /// The blobs in the pack
449    blobs: Vec<IndexBlob>,
450}
451
452impl PrunePack {
453    /// Create a new `PrunePack` from an `IndexPack`
454    ///
455    /// # Arguments
456    ///
457    /// * `p` - The `IndexPack` to create the `PrunePack` from
458    /// * `delete_mark` - Whether the pack is marked for deletion
459    fn from_index_pack(p: IndexPack, delete_mark: bool) -> Self {
460        Self {
461            id: p.id,
462            blob_type: p.blob_type(),
463            size: p.pack_size(),
464            delete_mark,
465            to_do: PackToDo::Undecided,
466            time: p.time,
467            blobs: p.blobs,
468        }
469    }
470
471    /// Create a new `PrunePack` from an `IndexPack` which is not marked for deletion
472    ///
473    /// # Arguments
474    ///
475    /// * `p` - The `IndexPack` to create the `PrunePack` from
476    fn from_index_pack_unmarked(p: IndexPack) -> Self {
477        Self::from_index_pack(p, false)
478    }
479
480    /// Create a new `PrunePack` from an `IndexPack` which is marked for deletion
481    ///
482    /// # Arguments
483    ///
484    /// * `p` - The `IndexPack` to create the `PrunePack` from
485    fn from_index_pack_marked(p: IndexPack) -> Self {
486        Self::from_index_pack(p, true)
487    }
488
489    /// Convert the `PrunePack` into an `IndexPack`
490    fn into_index_pack(self) -> IndexPack {
491        IndexPack {
492            id: self.id,
493            time: self.time,
494            size: None,
495            blobs: self.blobs,
496        }
497    }
498
499    /// Convert the `PrunePack` into an `IndexPack` with the given time
500    ///
501    /// # Arguments
502    ///
503    /// * `time` - The time to set
504    fn into_index_pack_with_time(self, time: DateTime<Local>) -> IndexPack {
505        IndexPack {
506            id: self.id,
507            time: Some(time),
508            size: None,
509            blobs: self.blobs,
510        }
511    }
512
513    /// Set the task to be executed on the pack
514    ///
515    /// # Arguments
516    ///
517    /// * `todo` - The task to be executed on the pack
518    /// * `pi` - The `PackInfo` of the pack
519    /// * `stats` - The `PruneStats` of the `PrunePlan`
520    #[allow(clippy::similar_names)]
521    fn set_todo(
522        &mut self,
523        todo: PackToDo,
524        pi: &PackInfo,
525        status: EnumSet<PackStatus>,
526        stats: &mut PruneStats,
527    ) {
528        let tpe = self.blob_type;
529        stats.debug.add(pi, todo, status);
530        match todo {
531            PackToDo::Undecided => panic!("not possible"),
532            PackToDo::Keep => {
533                stats.packs.keep += 1;
534            }
535            PackToDo::Repack => {
536                stats.packs.repack += 1;
537                stats.blobs[tpe].repack += u64::from(pi.unused_blobs + pi.used_blobs);
538                stats.blobs[tpe].repackrm += u64::from(pi.unused_blobs);
539                stats.size[tpe].repack += u64::from(pi.unused_size + pi.used_size);
540                stats.size[tpe].repackrm += u64::from(pi.unused_size);
541            }
542
543            PackToDo::MarkDelete => {
544                stats.blobs[tpe].remove += u64::from(pi.unused_blobs);
545                stats.size[tpe].remove += u64::from(pi.unused_size);
546            }
547            PackToDo::Recover => {
548                stats.packs_to_delete.recover += 1;
549                stats.size_to_delete.recover += u64::from(self.size);
550            }
551            PackToDo::Delete => {
552                stats.packs_to_delete.remove += 1;
553                stats.size_to_delete.remove += u64::from(self.size);
554            }
555            PackToDo::KeepMarked | PackToDo::KeepMarkedAndCorrect => {
556                stats.packs_to_delete.keep += 1;
557                stats.size_to_delete.keep += u64::from(self.size);
558            }
559        }
560        self.to_do = todo;
561    }
562
563    /// Returns whether the pack is compressed
564    fn is_compressed(&self) -> bool {
565        self.blobs
566            .iter()
567            .all(|blob| blob.uncompressed_length.is_some())
568    }
569}
570
571/// Reasons why a pack should be repacked
572#[derive(PartialEq, Eq, Debug)]
573enum RepackReason {
574    /// The pack is partly used
575    PartlyUsed,
576    /// The pack is to be compressed
577    ToCompress,
578    /// The pack has a size mismatch
579    SizeMismatch,
580}
581
582/// A plan what should be repacked or removed by a `prune` run
583#[derive(Debug)]
584pub struct PrunePlan {
585    /// The time the plan was created
586    time: DateTime<Local>,
587    /// The ids of the blobs which are used
588    used_ids: BTreeMap<BlobId, u8>,
589    /// The ids of the existing packs
590    existing_packs: BTreeMap<PackId, u32>,
591    /// The packs which should be repacked
592    repack_candidates: Vec<(PackInfo, EnumSet<PackStatus>, RepackReason, usize, usize)>,
593    /// The index files
594    index_files: Vec<PruneIndex>,
595    /// `prune` statistics
596    pub stats: PruneStats,
597}
598
599impl PrunePlan {
600    /// Create a new `PrunePlan`
601    ///
602    /// # Arguments
603    ///
604    /// * `used_ids` - The ids of the blobs which are used
605    /// * `existing_packs` - The ids of the existing packs
606    /// * `index_files` - The index files
607    fn new(
608        used_ids: BTreeMap<BlobId, u8>,
609        existing_packs: BTreeMap<PackId, u32>,
610        index_files: Vec<(IndexId, IndexFile)>,
611    ) -> Self {
612        let mut processed_packs = BTreeSet::new();
613        let mut processed_packs_delete = BTreeSet::new();
614        let mut index_files: Vec<_> = index_files
615            .into_iter()
616            .map(|(id, index)| {
617                let mut modified = false;
618                let mut packs: Vec<_> = index
619                    .packs
620                    .into_iter()
621                    // filter out duplicate packs
622                    .filter(|p| {
623                        let no_duplicate = processed_packs.insert(p.id);
624                        modified |= !no_duplicate;
625                        no_duplicate
626                    })
627                    .map(PrunePack::from_index_pack_unmarked)
628                    .collect();
629                packs.extend(
630                    index
631                        .packs_to_delete
632                        .into_iter()
633                        // filter out duplicate packs
634                        .filter(|p| {
635                            let no_duplicate = processed_packs_delete.insert(p.id);
636                            modified |= !no_duplicate;
637                            no_duplicate
638                        })
639                        .map(PrunePack::from_index_pack_marked),
640                );
641
642                PruneIndex {
643                    id,
644                    modified,
645                    packs,
646                }
647            })
648            .collect();
649
650        // filter out "normally" indexed packs from packs_to_delete
651        for index in &mut index_files {
652            let mut modified = false;
653            index.packs.retain(|p| {
654                !p.delete_mark || {
655                    let duplicate = processed_packs.contains(&p.id);
656                    modified |= duplicate;
657                    !duplicate
658                }
659            });
660
661            index.modified |= modified;
662        }
663
664        Self {
665            time: Local::now(),
666            used_ids,
667            existing_packs,
668            repack_candidates: Vec::new(),
669            index_files,
670            stats: PruneStats::default(),
671        }
672    }
673
674    /// Get a `PrunePlan` from the given `PruneOptions`.
675    ///
676    /// # Type Parameters
677    ///
678    /// * `P` - The progress bar type
679    /// * `S` - The state the repository is in
680    ///
681    /// # Arguments
682    ///
683    /// * `repo` - The repository to get the `PrunePlan` for
684    /// * `opts` - The `PruneOptions` to use
685    ///
686    /// # Errors
687    ///
688    /// * If `repack_uncompressed` is set and the repository is a version 1 repository
689    /// * If `keep_pack` or `keep_delete` is out of range
690    pub fn from_prune_options<P: ProgressBars, S: Open>(
691        repo: &Repository<P, S>,
692        opts: &PruneOptions,
693    ) -> RusticResult<Self> {
694        let pb = &repo.pb;
695        let be = repo.dbe();
696
697        let version = repo.config().version;
698
699        if version < 2 && opts.repack_uncompressed {
700            return Err(RusticError::new(
701                ErrorKind::Unsupported,
702                "Repacking uncompressed pack is unsupported in Repository version `{config_version}`.",
703            )
704            .attach_context("config_version", version.to_string()));
705        }
706
707        let mut index_files = Vec::new();
708
709        let p = pb.progress_counter("reading index...");
710        let mut index_collector = IndexCollector::new(IndexType::OnlyTrees);
711
712        for index in be.stream_all::<IndexFile>(&p)? {
713            let (id, index) = index?;
714            index_collector.extend(index.packs.clone());
715            // we add the trees from packs_to_delete to the index such that searching for
716            // used blobs doesn't abort if they are already marked for deletion
717            index_collector.extend(index.packs_to_delete.clone());
718
719            index_files.push((id, index));
720        }
721        p.finish();
722
723        let (used_ids, total_size) = {
724            let index = GlobalIndex::new_from_index(index_collector.into_index());
725            let total_size = BlobTypeMap::init(|blob_type| index.total_size(blob_type));
726            let used_ids = find_used_blobs(be, &index, &opts.ignore_snaps, pb)?;
727            (used_ids, total_size)
728        };
729
730        // list existing pack files
731        let p = pb.progress_spinner("getting packs from repository...");
732        let existing_packs: BTreeMap<_, _> = be
733            .list_with_size(FileType::Pack)?
734            .into_iter()
735            .map(|(id, size)| (PackId::from(id), size))
736            .collect();
737        p.finish();
738
739        let mut pruner = Self::new(used_ids, existing_packs, index_files);
740        pruner.count_used_blobs();
741        pruner.check()?;
742        let repack_cacheable_only = opts
743            .repack_cacheable_only
744            .unwrap_or_else(|| repo.config().is_hot == Some(true));
745        let pack_sizer =
746            total_size.map(|tpe, size| PackSizer::from_config(repo.config(), tpe, size));
747
748        pruner.decide_packs(
749            Duration::from_std(*opts.keep_pack).map_err(|err| {
750                RusticError::with_source(
751                    ErrorKind::Internal,
752                    "Failed to convert keep_pack duration `{keep_pack}` to std::time::Duration.",
753                    err,
754                )
755                .attach_context("keep_pack", opts.keep_pack.to_string())
756            })?,
757            Duration::from_std(*opts.keep_delete).map_err(|err| {
758                RusticError::with_source(
759                    ErrorKind::Internal,
760                    "Failed to convert keep_delete duration `{keep_delete}` to std::time::Duration.",
761                    err,
762                )
763                .attach_context("keep_delete", opts.keep_delete.to_string())
764            })?,
765            repack_cacheable_only,
766            opts.repack_uncompressed,
767            opts.repack_all,
768            &pack_sizer,
769        )?;
770
771        pruner.decide_repack(
772            &opts.max_repack,
773            &opts.max_unused,
774            opts.repack_uncompressed || opts.repack_all,
775            opts.no_resize,
776            &pack_sizer,
777        );
778
779        pruner.check_existing_packs()?;
780        pruner.filter_index_files(opts.instant_delete);
781
782        Ok(pruner)
783    }
784
785    /// This function counts the number of times a blob is used in the index files.
786    fn count_used_blobs(&mut self) {
787        for blob in self
788            .index_files
789            .iter()
790            .flat_map(|index| &index.packs)
791            .flat_map(|pack| &pack.blobs)
792        {
793            if let Some(count) = self.used_ids.get_mut(&blob.id) {
794                // note that duplicates are only counted up to 255. If there are more
795                // duplicates, the number is set to 255. This may imply that later on
796                // not the "best" pack is chosen to have that blob marked as used.
797                *count = count.saturating_add(1);
798            }
799        }
800    }
801
802    /// This function checks whether all used blobs are present in the index files.
803    ///
804    /// # Errors
805    ///
806    /// * If a blob is missing
807    fn check(&self) -> RusticResult<()> {
808        for (id, count) in &self.used_ids {
809            if *count == 0 {
810                return Err(RusticError::new(
811                    ErrorKind::Internal,
812                    "Blob ID `{blob_id}` is missing in index files.",
813                )
814                .attach_context("blob_id", id.to_string())
815                .ask_report());
816            }
817        }
818
819        Ok(())
820    }
821
822    /// Decides what to do with the packs
823    ///
824    /// # Arguments
825    ///
826    /// * `keep_pack` - The minimum duration to keep packs before repacking or removing
827    /// * `keep_delete` - The minimum duration to keep packs marked for deletion
828    /// * `repack_cacheable_only` - Whether to only repack cacheable packs
829    /// * `repack_uncompressed` - Whether to repack packs containing uncompressed blobs
830    /// * `repack_all` - Whether to repack all packs
831    /// * `pack_sizer` - The `PackSizer` for the packs
832    ///
833    /// # Errors
834    ///
835    // TODO: add errors!
836    #[allow(clippy::too_many_lines)]
837    #[allow(clippy::unnecessary_wraps)]
838    fn decide_packs(
839        &mut self,
840        keep_pack: Duration,
841        keep_delete: Duration,
842        repack_cacheable_only: bool,
843        repack_uncompressed: bool,
844        repack_all: bool,
845        pack_sizer: &BlobTypeMap<PackSizer>,
846    ) -> RusticResult<()> {
847        // first process all marked packs then the unmarked ones:
848        // - first processed packs are more likely to have all blobs seen as unused
849        // - if marked packs have used blob but these blobs are all present in
850        //   unmarked packs, we want to perform the deletion!
851        for mark_case in [true, false] {
852            for (index_num, index) in self.index_files.iter_mut().enumerate() {
853                for (pack_num, pack) in index
854                    .packs
855                    .iter_mut()
856                    .enumerate()
857                    .filter(|(_, p)| p.delete_mark == mark_case)
858                {
859                    let pi = PackInfo::from_pack(pack, &mut self.used_ids);
860                    //update used/unused stats
861                    self.stats.blobs[pi.blob_type].used += u64::from(pi.used_blobs);
862                    self.stats.blobs[pi.blob_type].unused += u64::from(pi.unused_blobs);
863                    self.stats.size[pi.blob_type].used += u64::from(pi.used_size);
864                    self.stats.size[pi.blob_type].unused += u64::from(pi.unused_size);
865                    let mut status = EnumSet::empty();
866
867                    // Various checks to determine if packs need to be kept
868                    let too_young = pack.time > Some(self.time - keep_pack);
869                    if too_young && !pack.delete_mark {
870                        _ = status.insert(PackStatus::TooYoung);
871                    }
872                    let keep_uncacheable = repack_cacheable_only && !pack.blob_type.is_cacheable();
873
874                    let to_compress = repack_uncompressed && !pack.is_compressed();
875                    if to_compress {
876                        _ = status.insert(PackStatus::NotCompressed);
877                    }
878                    let size_mismatch = !pack_sizer[pack.blob_type].size_ok(pack.size);
879                    if pack_sizer[pack.blob_type].is_too_small(pack.size) {
880                        _ = status.insert(PackStatus::TooSmall);
881                    }
882                    if pack_sizer[pack.blob_type].is_too_large(pack.size) {
883                        _ = status.insert(PackStatus::TooLarge);
884                    }
885                    match (pack.delete_mark, pi.used_blobs, pi.unused_blobs) {
886                        (false, 0, _) => {
887                            // unused pack
888                            self.stats.packs.unused += 1;
889                            _ = status.insert(PackStatus::HasUnusedBlobs);
890                            if too_young {
891                                // keep packs which are too young
892                                pack.set_todo(PackToDo::Keep, &pi, status, &mut self.stats);
893                            } else {
894                                pack.set_todo(PackToDo::MarkDelete, &pi, status, &mut self.stats);
895                            }
896                        }
897                        (false, 1.., 0) => {
898                            // used pack
899                            self.stats.packs.used += 1;
900                            _ = status.insert(PackStatus::HasUsedBlobs);
901                            if too_young || keep_uncacheable {
902                                pack.set_todo(PackToDo::Keep, &pi, status, &mut self.stats);
903                            } else if to_compress || repack_all {
904                                self.repack_candidates.push((
905                                    pi,
906                                    status,
907                                    RepackReason::ToCompress,
908                                    index_num,
909                                    pack_num,
910                                ));
911                            } else if size_mismatch {
912                                self.repack_candidates.push((
913                                    pi,
914                                    status,
915                                    RepackReason::SizeMismatch,
916                                    index_num,
917                                    pack_num,
918                                ));
919                            } else {
920                                pack.set_todo(PackToDo::Keep, &pi, status, &mut self.stats);
921                            }
922                        }
923
924                        (false, 1.., 1..) => {
925                            // partly used pack
926                            self.stats.packs.partly_used += 1;
927                            status
928                                .insert_all(PackStatus::HasUsedBlobs | PackStatus::HasUnusedBlobs);
929
930                            if too_young || keep_uncacheable {
931                                // keep packs which are too young and non-cacheable packs if requested
932                                pack.set_todo(PackToDo::Keep, &pi, status, &mut self.stats);
933                            } else {
934                                // other partly used pack => candidate for repacking
935                                self.repack_candidates.push((
936                                    pi,
937                                    status,
938                                    RepackReason::PartlyUsed,
939                                    index_num,
940                                    pack_num,
941                                ));
942                            }
943                        }
944                        (true, 0, _) => {
945                            _ = status.insert(PackStatus::Marked);
946                            match pack.time {
947                                // unneeded and marked pack => check if we can remove it.
948                                Some(local_date_time)
949                                    if self.time - local_date_time >= keep_delete =>
950                                {
951                                    _ = status.insert(PackStatus::TooYoung);
952                                    pack.set_todo(PackToDo::Delete, &pi, status, &mut self.stats);
953                                }
954                                None => {
955                                    warn!(
956                                        "pack to delete {}: no time set, this should not happen! Keeping this pack.",
957                                        pack.id
958                                    );
959                                    _ = status.insert(PackStatus::TimeNotSet);
960                                    pack.set_todo(
961                                        PackToDo::KeepMarkedAndCorrect,
962                                        &pi,
963                                        status,
964                                        &mut self.stats,
965                                    );
966                                }
967                                Some(_) => pack.set_todo(
968                                    PackToDo::KeepMarked,
969                                    &pi,
970                                    status,
971                                    &mut self.stats,
972                                ),
973                            }
974                        }
975                        (true, 1.., _) => {
976                            status.insert_all(PackStatus::Marked | PackStatus::HasUsedBlobs);
977                            // needed blobs; mark this pack for recovery
978                            pack.set_todo(PackToDo::Recover, &pi, status, &mut self.stats);
979                        }
980                    }
981                }
982            }
983        }
984        Ok(())
985    }
986
987    /// Decides if packs should be repacked
988    ///
989    /// # Arguments
990    ///
991    /// * `max_repack` - The maximum size of packs to repack
992    /// * `max_unused` - The maximum size of unused blobs
993    /// * `repack_uncompressed` - Whether to repack packs containing uncompressed blobs
994    /// * `no_resize` - Whether to resize packs
995    /// * `pack_sizer` - The `PackSizer` for the packs
996    ///
997    /// # Errors
998    ///
999    // TODO: add errors!
1000    fn decide_repack(
1001        &mut self,
1002        max_repack: &LimitOption,
1003        max_unused: &LimitOption,
1004        repack_uncompressed: bool,
1005        no_resize: bool,
1006        pack_sizer: &BlobTypeMap<PackSizer>,
1007    ) {
1008        let max_unused = match (repack_uncompressed, max_unused) {
1009            (true, _) => 0,
1010            (false, LimitOption::Unlimited) => u64::MAX,
1011            (false, LimitOption::Size(size)) => size.as_u64(),
1012            // if percentag is given, we want to have
1013            // unused <= p/100 * size_after = p/100 * (size_used + unused)
1014            // which equals (1 - p/100) * unused <= p/100 * size_used
1015            (false, LimitOption::Percentage(p)) => (p * self.stats.size_sum().used) / (100 - p),
1016        };
1017
1018        let max_repack = match max_repack {
1019            LimitOption::Unlimited => u64::MAX,
1020            LimitOption::Size(size) => size.as_u64(),
1021            LimitOption::Percentage(p) => (p * self.stats.size_sum().total()) / 100,
1022        };
1023
1024        self.repack_candidates.sort_unstable_by_key(|rc| rc.0);
1025        let mut resize_packs = BlobTypeMap::<Vec<_>>::default();
1026        let mut do_repack = BlobTypeMap::default();
1027        let mut repack_size = BlobTypeMap::<u64>::default();
1028
1029        for (pi, status, repack_reason, index_num, pack_num) in
1030            std::mem::take(&mut self.repack_candidates)
1031        {
1032            let pack = &mut self.index_files[index_num].packs[pack_num];
1033            let blob_type = pi.blob_type;
1034
1035            let total_repack_size: u64 = repack_size.into_values().sum();
1036            if total_repack_size + u64::from(pi.used_size) >= max_repack
1037                || (self.stats.size_sum().unused_after_prune() < max_unused
1038                    && repack_reason == RepackReason::PartlyUsed
1039                    && blob_type == BlobType::Data)
1040                || (repack_reason == RepackReason::SizeMismatch && no_resize)
1041            {
1042                pack.set_todo(PackToDo::Keep, &pi, status, &mut self.stats);
1043            } else if repack_reason == RepackReason::SizeMismatch {
1044                resize_packs[blob_type].push((pi, status, index_num, pack_num));
1045                repack_size[blob_type] += u64::from(pi.used_size);
1046            } else {
1047                pack.set_todo(PackToDo::Repack, &pi, status, &mut self.stats);
1048                repack_size[blob_type] += u64::from(pi.used_size);
1049                do_repack[blob_type] = true;
1050            }
1051        }
1052        for (blob_type, resize_packs) in resize_packs {
1053            // packs in resize_packs are only repacked if we anyway repack this blob type or
1054            // if the target pack size is reached for the blob type.
1055            let todo = if do_repack[blob_type]
1056                || repack_size[blob_type] > u64::from(pack_sizer[blob_type].pack_size())
1057            {
1058                PackToDo::Repack
1059            } else {
1060                PackToDo::Keep
1061            };
1062            for (pi, status, index_num, pack_num) in resize_packs {
1063                let pack = &mut self.index_files[index_num].packs[pack_num];
1064                pack.set_todo(todo, &pi, status, &mut self.stats);
1065            }
1066        }
1067    }
1068
1069    /// Checks if the existing packs are ok
1070    ///
1071    /// # Errors
1072    ///
1073    /// * If a pack is undecided
1074    /// * If the size of a pack does not match
1075    /// * If a pack does not exist
1076    fn check_existing_packs(&mut self) -> RusticResult<()> {
1077        for pack in self.index_files.iter().flat_map(|index| &index.packs) {
1078            let existing_size = self.existing_packs.remove(&pack.id);
1079
1080            // TODO: Unused Packs which don't exist (i.e. only existing in index)
1081            let check_size = || {
1082                match existing_size {
1083                    Some(size) if size == pack.size => Ok(()), // size is ok => continue
1084                    Some(size) => Err(RusticError::new(
1085                        ErrorKind::Internal,
1086                        "Pack size `{size_in_pack_real}` of id `{pack_id}` does not match the expected size `{size_in_index_expected}` in the index file. ",
1087                    )
1088                    .attach_context("pack_id", pack.id.to_string())
1089                    .attach_context("size_in_index_expected", pack.size.to_string())
1090                    .attach_context("size_in_pack_real", size.to_string())
1091                    .ask_report()),
1092                    None => Err(RusticError::new(ErrorKind::Internal, "Pack `{pack_id}` does not exist.")
1093                        .attach_context("pack_id", pack.id.to_string())
1094                        .ask_report()),
1095                }
1096            };
1097
1098            match pack.to_do {
1099                PackToDo::Undecided => {
1100                    return Err(RusticError::new(
1101                        ErrorKind::Internal,
1102                        "Pack `{pack_id}` got no decision what to do with it!",
1103                    )
1104                    .attach_context("pack_id", pack.id.to_string())
1105                    .ask_report());
1106                }
1107                PackToDo::Keep | PackToDo::Recover => {
1108                    for blob in &pack.blobs {
1109                        _ = self.used_ids.remove(&blob.id);
1110                    }
1111                    check_size()?;
1112                }
1113                PackToDo::Repack => {
1114                    check_size()?;
1115                }
1116                PackToDo::MarkDelete
1117                | PackToDo::Delete
1118                | PackToDo::KeepMarked
1119                | PackToDo::KeepMarkedAndCorrect => {}
1120            }
1121        }
1122
1123        // all remaining packs in existing_packs are unreferenced packs
1124        for size in self.existing_packs.values() {
1125            self.stats.size_unref += u64::from(*size);
1126        }
1127        self.stats.packs_unref = self.existing_packs.len() as u64;
1128
1129        Ok(())
1130    }
1131
1132    /// Filter out index files which do not need processing
1133    ///
1134    /// # Arguments
1135    ///
1136    /// * `instant_delete` - Whether to instantly delete unreferenced packs
1137    fn filter_index_files(&mut self, instant_delete: bool) {
1138        let mut any_must_modify = false;
1139        self.stats.index_files = self.index_files.len() as u64;
1140        // filter out only the index files which need processing
1141        self.index_files.retain(|index| {
1142            // index must be processed if it has been modified
1143            // or if any pack is not kept
1144            let must_modify = index.modified
1145                || index.packs.iter().any(|p| {
1146                    p.to_do != PackToDo::Keep && (instant_delete || p.to_do != PackToDo::KeepMarked)
1147                });
1148
1149            any_must_modify |= must_modify;
1150
1151            // also process index files which are too small (i.e. rebuild them)
1152            must_modify || index.len() < constants::MIN_INDEX_LEN
1153        });
1154
1155        if !any_must_modify && self.index_files.len() == 1 {
1156            // only one index file to process but only because it is too small
1157            self.index_files.clear();
1158        }
1159
1160        self.stats.index_files_rebuild = self.index_files.len() as u64;
1161
1162        // TODO: Sort index files such that files with deletes come first and files with
1163        // repacks come at end
1164    }
1165
1166    /// Get the list of packs-to-repack from the [`PrunePlan`].
1167    #[must_use]
1168    pub fn repack_packs(&self) -> Vec<PackId> {
1169        self.index_files
1170            .iter()
1171            .flat_map(|index| &index.packs)
1172            .filter(|pack| pack.to_do == PackToDo::Repack)
1173            .map(|pack| pack.id)
1174            .collect()
1175    }
1176
1177    /// Perform the pruning on the given repository.
1178    ///
1179    /// # Arguments
1180    ///
1181    /// * `repo` - The repository to prune
1182    /// * `opts` - The options for the pruning
1183    ///
1184    /// # Errors
1185    ///
1186    /// * If the repository is in append-only mode
1187    /// * If a pack has no decision
1188    ///
1189    /// # Returns
1190    ///
1191    /// * `Ok(())` - If the pruning was successful
1192    ///
1193    /// # Panics
1194    ///
1195    /// TODO! In weird circumstances, should be fixed.
1196    #[allow(clippy::significant_drop_tightening)]
1197    #[allow(clippy::too_many_lines)]
1198    #[deprecated(since = "0.5.2", note = "Use `Repository::prune()` instead.")]
1199    pub fn do_prune<P: ProgressBars, S: Open>(
1200        self,
1201        repo: &Repository<P, S>,
1202        opts: &PruneOptions,
1203    ) -> RusticResult<()> {
1204        repo.prune(opts, self)
1205    }
1206}
1207
1208/// Perform the pruning on the given repository.
1209///
1210/// # Arguments
1211///
1212/// * `repo` - The repository to prune
1213/// * `opts` - The options for the pruning
1214/// * `prune_plan` - The plan for the pruning
1215///
1216/// # Errors
1217///
1218/// * If the repository is in append-only mode
1219/// * If a pack has no decision
1220///
1221/// # Returns
1222///
1223/// * `Ok(())` - If the pruning was successful
1224///
1225/// # Panics
1226///
1227/// TODO! In weird circumstances, should be fixed.
1228#[allow(clippy::significant_drop_tightening)]
1229#[allow(clippy::too_many_lines)]
1230pub(crate) fn prune_repository<P: ProgressBars, S: Open>(
1231    repo: &Repository<P, S>,
1232    opts: &PruneOptions,
1233    prune_plan: PrunePlan,
1234) -> RusticResult<()> {
1235    if repo.config().append_only == Some(true) {
1236        return Err(RusticError::new(
1237            ErrorKind::AppendOnly,
1238            "Pruning is not allowed in append-only repositories. Please disable append-only mode first, if you know what you are doing. Aborting.",
1239        ));
1240    }
1241    repo.warm_up_wait(prune_plan.repack_packs().into_iter())?;
1242    let be = repo.dbe();
1243    let pb = &repo.pb;
1244
1245    let indexer = Indexer::new_unindexed(be.clone()).into_shared();
1246
1247    // Calculate an approximation of sizes after pruning.
1248    // The size actually is:
1249    // total_size_of_all_blobs + total_size_of_pack_headers + #packs * pack_overhead
1250    // This is hard/impossible to compute because:
1251    // - the size of blobs can change during repacking if compression is changed
1252    // - the size of pack headers depends on whether blobs are compressed or not
1253    // - we don't know the number of packs generated by repacking
1254    // So, we simply use the current size of the blobs and an estimation of the pack
1255    // header size.
1256
1257    let size_after_prune = BlobTypeMap::init(|blob_type| {
1258        prune_plan.stats.size[blob_type].total_after_prune()
1259            + prune_plan.stats.blobs[blob_type].total_after_prune()
1260                * u64::from(HeaderEntry::ENTRY_LEN_COMPRESSED)
1261    });
1262
1263    let tree_repacker = Repacker::new(
1264        be.clone(),
1265        BlobType::Tree,
1266        indexer.clone(),
1267        repo.config(),
1268        size_after_prune[BlobType::Tree],
1269    )?;
1270
1271    let data_repacker = Repacker::new(
1272        be.clone(),
1273        BlobType::Data,
1274        indexer.clone(),
1275        repo.config(),
1276        size_after_prune[BlobType::Data],
1277    )?;
1278
1279    // mark unreferenced packs for deletion
1280    if !prune_plan.existing_packs.is_empty() {
1281        if opts.instant_delete {
1282            let p = pb.progress_counter("removing unindexed packs...");
1283            let existing_packs: Vec<_> = prune_plan.existing_packs.into_keys().collect();
1284            be.delete_list(true, existing_packs.iter(), p)?;
1285        } else {
1286            let p = pb.progress_counter("marking unneeded unindexed pack files for deletion...");
1287            p.set_length(prune_plan.existing_packs.len().try_into().unwrap());
1288            for (id, size) in prune_plan.existing_packs {
1289                let pack = IndexPack {
1290                    id,
1291                    size: Some(size),
1292                    time: Some(Local::now()),
1293                    blobs: Vec::new(),
1294                };
1295                indexer.write().unwrap().add_remove(pack)?;
1296                p.inc(1);
1297            }
1298            p.finish();
1299        }
1300    }
1301
1302    // process packs by index_file
1303    let p = match (
1304        prune_plan.index_files.is_empty(),
1305        prune_plan.stats.packs.repack > 0,
1306    ) {
1307        (true, _) => {
1308            info!("nothing to do!");
1309            pb.progress_hidden()
1310        }
1311        // TODO: Use a MultiProgressBar here
1312        (false, true) => pb.progress_bytes("repacking // rebuilding index..."),
1313        (false, false) => pb.progress_spinner("rebuilding index..."),
1314    };
1315
1316    p.set_length(prune_plan.stats.size_sum().repack - prune_plan.stats.size_sum().repackrm);
1317
1318    let mut indexes_remove = Vec::new();
1319    let tree_packs_remove = Arc::new(Mutex::new(Vec::new()));
1320    let data_packs_remove = Arc::new(Mutex::new(Vec::new()));
1321
1322    let delete_pack = |pack: PrunePack| {
1323        // delete pack
1324        match pack.blob_type {
1325            BlobType::Data => data_packs_remove.lock().unwrap().push(pack.id),
1326            BlobType::Tree => tree_packs_remove.lock().unwrap().push(pack.id),
1327        }
1328    };
1329
1330    let used_ids = Arc::new(Mutex::new(prune_plan.used_ids));
1331
1332    let packs: Vec<_> = prune_plan
1333        .index_files
1334        .into_iter()
1335        .inspect(|index| {
1336            indexes_remove.push(index.id);
1337        })
1338        .flat_map(|index| index.packs)
1339        .collect();
1340
1341    // remove old index files early if requested
1342    if !indexes_remove.is_empty() && opts.early_delete_index {
1343        let p = pb.progress_counter("removing old index files...");
1344        be.delete_list(true, indexes_remove.iter(), p)?;
1345    }
1346
1347    // write new pack files and index files
1348    packs
1349        .into_par_iter()
1350        .try_for_each(|pack| -> RusticResult<_> {
1351            match pack.to_do {
1352                PackToDo::Undecided => {
1353                    return Err(RusticError::new(
1354                        ErrorKind::Internal,
1355                        "Pack `{pack_id}` got no decision what to do with it!",
1356                    )
1357                    .attach_context("pack_id", pack.id.to_string())
1358                    .ask_report());
1359                }
1360                PackToDo::Keep => {
1361                    // keep pack: add to new index
1362                    let pack = pack.into_index_pack();
1363                    indexer.write().unwrap().add(pack)?;
1364                }
1365                PackToDo::Repack => {
1366                    // TODO: repack in parallel
1367                    for blob in &pack.blobs {
1368                        if used_ids.lock().unwrap().remove(&blob.id).is_none() {
1369                            // don't save duplicate blobs
1370                            continue;
1371                        }
1372
1373                        let repacker = match blob.tpe {
1374                            BlobType::Data => &data_repacker,
1375                            BlobType::Tree => &tree_repacker,
1376                        };
1377                        if opts.fast_repack {
1378                            repacker.add_fast(&pack.id, blob)?;
1379                        } else {
1380                            repacker.add(&pack.id, blob)?;
1381                        }
1382                        p.inc(u64::from(blob.length));
1383                    }
1384                    if opts.instant_delete {
1385                        delete_pack(pack);
1386                    } else {
1387                        // mark pack for removal
1388                        let pack = pack.into_index_pack_with_time(prune_plan.time);
1389                        indexer.write().unwrap().add_remove(pack)?;
1390                    }
1391                }
1392                PackToDo::MarkDelete => {
1393                    if opts.instant_delete {
1394                        delete_pack(pack);
1395                    } else {
1396                        // mark pack for removal
1397                        let pack = pack.into_index_pack_with_time(prune_plan.time);
1398                        indexer.write().unwrap().add_remove(pack)?;
1399                    }
1400                }
1401                PackToDo::KeepMarked | PackToDo::KeepMarkedAndCorrect => {
1402                    if opts.instant_delete {
1403                        delete_pack(pack);
1404                    } else {
1405                        // keep pack: add to new index; keep the timestamp.
1406                        // Note the timestamp shouldn't be None here, however if it is not not set, use the current time to heal the entry!
1407                        let time = pack.time.unwrap_or(prune_plan.time);
1408                        let pack = pack.into_index_pack_with_time(time);
1409                        indexer.write().unwrap().add_remove(pack)?;
1410                    }
1411                }
1412                PackToDo::Recover => {
1413                    // recover pack: add to new index in section packs
1414                    let pack = pack.into_index_pack_with_time(prune_plan.time);
1415                    indexer.write().unwrap().add(pack)?;
1416                }
1417                PackToDo::Delete => delete_pack(pack),
1418            }
1419            Ok(())
1420        })?;
1421    _ = tree_repacker.finalize()?;
1422    _ = data_repacker.finalize()?;
1423    indexer.write().unwrap().finalize()?;
1424    p.finish();
1425
1426    // remove old index files first as they may reference pack files which are removed soon.
1427    if !indexes_remove.is_empty() && !opts.early_delete_index {
1428        let p = pb.progress_counter("removing old index files...");
1429        be.delete_list(true, indexes_remove.iter(), p)?;
1430    }
1431
1432    // get variable out of Arc<Mutex<_>>
1433    let data_packs_remove = data_packs_remove.lock().unwrap();
1434    if !data_packs_remove.is_empty() {
1435        let p = pb.progress_counter("removing old data packs...");
1436        be.delete_list(false, data_packs_remove.iter(), p)?;
1437    }
1438
1439    // get variable out of Arc<Mutex<_>>
1440    let tree_packs_remove = tree_packs_remove.lock().unwrap();
1441    if !tree_packs_remove.is_empty() {
1442        let p = pb.progress_counter("removing old tree packs...");
1443        be.delete_list(true, tree_packs_remove.iter(), p)?;
1444    }
1445
1446    Ok(())
1447}
1448
1449/// `PackInfo` contains information about a pack which is needed to decide what to do with the pack.
1450#[derive(PartialEq, Eq, Clone, Copy, Debug)]
1451struct PackInfo {
1452    /// What type of blobs are in the pack
1453    blob_type: BlobType,
1454    /// The number of used blobs in the pack
1455    used_blobs: u16,
1456    /// The number of unused blobs in the pack
1457    unused_blobs: u16,
1458    /// The size of the used blobs in the pack
1459    used_size: u32,
1460    /// The size of the unused blobs in the pack
1461    unused_size: u32,
1462}
1463
1464impl PartialOrd<Self> for PackInfo {
1465    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1466        Some(self.cmp(other))
1467    }
1468}
1469
1470impl Ord for PackInfo {
1471    fn cmp(&self, other: &Self) -> Ordering {
1472        // first order by blob type such that tree packs are picked first
1473        self.blob_type.cmp(&other.blob_type).then(
1474            // then order such that packs with highest
1475            // ratio unused/used space are picked first.
1476            // This is equivalent to ordering by unused / total space.
1477            (u64::from(other.unused_size) * u64::from(self.used_size))
1478                .cmp(&(u64::from(self.unused_size) * u64::from(other.used_size))),
1479        )
1480    }
1481}
1482
1483impl PackInfo {
1484    /// Create a `PackInfo` from a `PrunePack`.
1485    ///
1486    /// # Arguments
1487    ///
1488    /// * `pack` - The `PrunePack` to create the `PackInfo` from
1489    /// * `used_ids` - The `BTreeMap` of used ids
1490    fn from_pack(pack: &PrunePack, used_ids: &mut BTreeMap<BlobId, u8>) -> Self {
1491        let mut pi = Self {
1492            blob_type: pack.blob_type,
1493            used_blobs: 0,
1494            unused_blobs: 0,
1495            used_size: 0,
1496            unused_size: 0,
1497        };
1498
1499        // We search all blobs in the pack for needed ones. We do this by already marking
1500        // and decreasing the used blob counter for the processed blobs. If the counter
1501        // was decreased to 0, the blob and therefore the pack is actually used.
1502        // Note that by this processing, we are also able to handle duplicate blobs within a pack
1503        // correctly.
1504        // If we found a needed blob, we stop and process the information that the pack is actually needed.
1505        let first_needed = pack.blobs.iter().position(|blob| {
1506            match used_ids.get_mut(&blob.id) {
1507                None | Some(0) => {
1508                    pi.unused_size += blob.length;
1509                    pi.unused_blobs += 1;
1510                }
1511                Some(count) => {
1512                    // decrease counter
1513                    *count -= 1;
1514                    if *count == 0 {
1515                        // blob is actually needed
1516                        pi.used_size += blob.length;
1517                        pi.used_blobs += 1;
1518                        return true; // break the search
1519                    }
1520                    // blob is not needed
1521                    pi.unused_size += blob.length;
1522                    pi.unused_blobs += 1;
1523                }
1524            }
1525            false // continue with next blob
1526        });
1527
1528        if let Some(first_needed) = first_needed {
1529            // The pack is actually needed.
1530            // We reprocess the blobs up to the first needed one and mark all blobs which are generally needed as used.
1531            for blob in &pack.blobs[..first_needed] {
1532                match used_ids.get_mut(&blob.id) {
1533                    None | Some(0) => {} // already correctly marked
1534                    Some(count) => {
1535                        // remark blob as used
1536                        pi.unused_size -= blob.length;
1537                        pi.unused_blobs -= 1;
1538                        pi.used_size += blob.length;
1539                        pi.used_blobs += 1;
1540                        *count = 0; // count = 0 indicates to other packs that the blob is not needed anymore.
1541                    }
1542                }
1543            }
1544            // Then we process the remaining blobs and mark all blobs which are generally needed as used in this blob
1545            for blob in &pack.blobs[first_needed + 1..] {
1546                match used_ids.get_mut(&blob.id) {
1547                    None | Some(0) => {
1548                        pi.unused_size += blob.length;
1549                        pi.unused_blobs += 1;
1550                    }
1551                    Some(count) => {
1552                        // blob is used in this pack
1553                        pi.used_size += blob.length;
1554                        pi.used_blobs += 1;
1555                        *count = 0; // count = 0 indicates to other packs that the blob is not needed anymore.
1556                    }
1557                }
1558            }
1559        }
1560
1561        pi
1562    }
1563}
1564
1565/// Find used blobs in repo and return a map of used ids.
1566///
1567/// # Arguments
1568///
1569/// * `index` - The index to use
1570/// * `ignore_snaps` - The snapshots to ignore
1571/// * `pb` - The progress bars
1572///
1573/// # Errors
1574///
1575// TODO!: add errors!
1576fn find_used_blobs(
1577    be: &impl DecryptReadBackend,
1578    index: &impl ReadGlobalIndex,
1579    ignore_snaps: &[SnapshotId],
1580    pb: &impl ProgressBars,
1581) -> RusticResult<BTreeMap<BlobId, u8>> {
1582    let ignore_snaps: BTreeSet<_> = ignore_snaps.iter().collect();
1583
1584    let p = pb.progress_counter("reading snapshots...");
1585    let list: Vec<_> = be
1586        .list(FileType::Snapshot)?
1587        .into_iter()
1588        .map(SnapshotId::from)
1589        .filter(|id| !ignore_snaps.contains(&id))
1590        .collect();
1591    let snap_trees: Vec<_> = be
1592        .stream_list::<SnapshotFile>(&list, &p)?
1593        .into_iter()
1594        .map_ok(|(_, snap)| snap.tree)
1595        .try_collect()?;
1596    p.finish();
1597
1598    let mut ids: BTreeMap<_, _> = snap_trees
1599        .iter()
1600        .map(|id| (BlobId::from(**id), 0))
1601        .collect();
1602    let p = pb.progress_counter("finding used blobs...");
1603
1604    let mut tree_streamer = TreeStreamerOnce::new(be, index, snap_trees, p)?;
1605    while let Some(item) = tree_streamer.next().transpose()? {
1606        let (_, tree) = item;
1607        for node in tree.nodes {
1608            match node.node_type {
1609                NodeType::File => {
1610                    ids.extend(
1611                        node.content
1612                            .iter()
1613                            .flatten()
1614                            .map(|id| (BlobId::from(**id), 0)),
1615                    );
1616                }
1617                NodeType::Dir => {
1618                    _ = ids.insert(BlobId::from(*node.subtree.unwrap()), 0);
1619                }
1620                _ => {} // nothing to do
1621            }
1622        }
1623    }
1624
1625    Ok(ids)
1626}