tmux_backup/management/
compaction.rs

1//! Allows to keep the number of backup files under control.
2
3use std::fmt;
4
5use chrono::{Datelike, Timelike};
6use chrono::{Duration, Local};
7use itertools::Itertools;
8
9use super::backup::{Backup, BackupStatus};
10
11/// Backups compaction strategy.
12///
13/// Determines if a backup can be kept (retainable) or purged (purgeable).
14#[derive(Debug, Clone)]
15pub enum Strategy {
16    /// Keep the `k` most recent backups.
17    KeepMostRecent {
18        /// Number of recent backup files to keep.
19        k: usize,
20    },
21
22    /// Classic backup strategy.
23    ///
24    /// This is only useful if you save _very_ often, probably in an automated manner. See
25    /// the method [`Strategy::plan`] for details.
26    Classic,
27}
28
29impl Strategy {
30    /// Return a new simple strategy.
31    pub fn most_recent(k: usize) -> Self {
32        Self::KeepMostRecent { k }
33    }
34
35    /// Determine which backup files should be kept.
36    ///
37    /// The `backup_files` are assumed to be sorted from oldest to newest.
38    ///
39    /// # KeepMostRecent strategy
40    ///
41    /// Simply splits the list of all backups into 2 lists: the `k` recent ones (or less if the
42    /// catalog does not contain as much) and the remaining ones are considered outdated
43    /// (purgeable).
44    ///
45    /// # Classic strategy
46    ///
47    /// Its goal is to keep
48    ///
49    /// - the lastest backup per hour for the past 24 hours (max 23 backups - exclude the past hour),
50    /// - the lastest backup per day for the past 7 days (max 6 backups - exclude the past 24 hours),
51    /// - the lastest backup per week of the past 4 weeks (max 3 backups - exclude the past week),
52    /// - the lastest backup per month of this year (max 11 backups - exclude the past month).
53    ///
54    /// The time windows above are a partition; they do not overlap. Within each partition,
55    /// only the most recent backup is kept.
56    ///
57    pub fn plan<'a>(&self, backups: &'a [Backup]) -> Plan<'a> {
58        match self {
59            Strategy::KeepMostRecent { k } => {
60                let k = std::cmp::min(backups.len(), *k);
61                let index = std::cmp::max(0, backups.len() - k);
62                let (outdated_backups, recent_backups) = backups.split_at(index);
63
64                let mut statuses = vec![];
65                statuses.extend(
66                    outdated_backups
67                        .iter()
68                        .map(|backup| (backup, BackupStatus::Purgeable)),
69                );
70                statuses.extend(
71                    recent_backups
72                        .iter()
73                        .map(|backup| (backup, BackupStatus::Retainable)),
74                );
75
76                Plan {
77                    purgeable: outdated_backups.iter().collect(),
78                    retainable: recent_backups.iter().collect(),
79                    statuses,
80                }
81            }
82
83            Strategy::Classic => {
84                let now = Local::now().naive_local();
85                let _24h_ago = now - Duration::days(1);
86                let _7d_ago = now - Duration::days(7);
87                let _4w_ago = now - Duration::weeks(4);
88                let _year_ago = now - Duration::days(365);
89
90                // Last 24 h, grouped by hour
91                let last_24h_per_hour: Vec<_> = backups
92                    .iter()
93                    .filter(|&b| b.creation_date > _24h_ago)
94                    .chunk_by(|&b| b.creation_date.hour())
95                    .into_iter()
96                    .map(|(_key, group)| group.collect::<Vec<_>>())
97                    .filter_map(|group| group.last().cloned())
98                    .collect();
99
100                // Last 7 days excluding the last 24 h, grouped by day
101                let last_7d_per_day: Vec<_> = backups
102                    .iter()
103                    .filter(|&b| _24h_ago > b.creation_date && b.creation_date >= _7d_ago)
104                    .chunk_by(|&b| b.creation_date.day())
105                    .into_iter()
106                    .map(|(_key, group)| group.collect::<Vec<_>>())
107                    .filter_map(|group| group.last().cloned())
108                    .collect();
109
110                // Last 4 weeks excluding the last 7 days, grouped by week number
111                let last_4w_per_isoweek: Vec<_> = backups
112                    .iter()
113                    .filter(|&b| _7d_ago > b.creation_date && b.creation_date >= _4w_ago)
114                    .chunk_by(|&b| b.creation_date.iso_week())
115                    .into_iter()
116                    .map(|(_key, group)| group.collect::<Vec<_>>())
117                    .filter_map(|group| group.last().cloned())
118                    .collect();
119
120                // Last year (365 days) excluding the last 4 weeks, grouped by month
121                let last_year_per_month: Vec<_> = backups
122                    .iter()
123                    .filter(|&b| _4w_ago > b.creation_date && b.creation_date >= _year_ago)
124                    .chunk_by(|&b| b.creation_date.month())
125                    .into_iter()
126                    .map(|(_key, group)| group.collect::<Vec<_>>())
127                    .filter_map(|group| group.last().cloned())
128                    .collect();
129
130                let retainable: Vec<_> = vec![
131                    last_year_per_month,
132                    last_4w_per_isoweek,
133                    last_7d_per_day,
134                    last_24h_per_hour,
135                ]
136                .into_iter()
137                .flatten()
138                .collect();
139
140                let retain_set: std::collections::HashSet<&Backup> =
141                    retainable.iter().copied().collect();
142
143                let purgeable: Vec<_> = backups
144                    .iter()
145                    .filter(|&b| !retain_set.contains(b))
146                    .collect();
147
148                let statuses: Vec<_> = backups
149                    .iter()
150                    .map(|b| {
151                        if retain_set.contains(b) {
152                            (b, BackupStatus::Retainable)
153                        } else {
154                            (b, BackupStatus::Purgeable)
155                        }
156                    })
157                    .collect();
158
159                Plan {
160                    purgeable,
161                    retainable,
162                    statuses,
163                }
164            }
165        }
166    }
167}
168
169impl fmt::Display for Strategy {
170    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
171        match self {
172            Strategy::KeepMostRecent { k } => {
173                write!(f, "KeepMostRecent: {k}")
174            }
175            Strategy::Classic => write!(f, "Classic"),
176        }
177    }
178}
179
180/// Describes what the strategy would do.
181pub struct Plan<'a> {
182    /// List of backup files that should be purged.
183    pub purgeable: Vec<&'a Backup>,
184
185    /// List of backup files that should be kept.
186    pub retainable: Vec<&'a Backup>,
187
188    /// Sorted list of backup files along with their status (purgeable/retainable).
189    pub statuses: Vec<(&'a Backup, BackupStatus)>,
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195    use chrono::NaiveDate;
196    use std::path::PathBuf;
197
198    /// Create a backup at the given date/time. The path encodes the datetime for easy debugging.
199    fn backup_at(year: i32, month: u32, day: u32, hour: u32, min: u32, sec: u32) -> Backup {
200        let dt = NaiveDate::from_ymd_opt(year, month, day)
201            .unwrap()
202            .and_hms_opt(hour, min, sec)
203            .unwrap();
204        Backup {
205            filepath: PathBuf::from(format!(
206                "/backups/backup-{}.tar.zst",
207                dt.format("%Y%m%dT%H%M%S")
208            )),
209            creation_date: dt,
210        }
211    }
212
213    /// Generate a sequence of backups, one per hour, starting from a base datetime.
214    fn generate_hourly_backups(count: usize) -> Vec<Backup> {
215        (0..count)
216            .map(|i| {
217                let hour = i % 24;
218                let day = 1 + (i / 24);
219                backup_at(2024, 6, day as u32, hour as u32, 0, 0)
220            })
221            .collect()
222    }
223
224    mod keep_most_recent_strategy {
225        use super::*;
226
227        #[test]
228        fn empty_catalog_produces_empty_plan() {
229            let strategy = Strategy::most_recent(5);
230            let backups: Vec<Backup> = vec![];
231
232            let plan = strategy.plan(&backups);
233
234            assert!(plan.purgeable.is_empty());
235            assert!(plan.retainable.is_empty());
236            assert!(plan.statuses.is_empty());
237        }
238
239        #[test]
240        fn single_backup_when_k_is_one() {
241            let strategy = Strategy::most_recent(1);
242            let backups = vec![backup_at(2024, 6, 15, 10, 0, 0)];
243
244            let plan = strategy.plan(&backups);
245
246            assert!(plan.purgeable.is_empty());
247            assert_eq!(plan.retainable.len(), 1);
248        }
249
250        #[test]
251        fn single_backup_when_k_exceeds_count() {
252            let strategy = Strategy::most_recent(10);
253            let backups = vec![backup_at(2024, 6, 15, 10, 0, 0)];
254
255            let plan = strategy.plan(&backups);
256
257            // Should keep the one backup we have, not fail
258            assert!(plan.purgeable.is_empty());
259            assert_eq!(plan.retainable.len(), 1);
260        }
261
262        #[test]
263        fn keeps_exactly_k_most_recent() {
264            let strategy = Strategy::most_recent(3);
265            let backups = vec![
266                backup_at(2024, 6, 15, 8, 0, 0),  // oldest - purgeable
267                backup_at(2024, 6, 15, 9, 0, 0),  // purgeable
268                backup_at(2024, 6, 15, 10, 0, 0), // retainable
269                backup_at(2024, 6, 15, 11, 0, 0), // retainable
270                backup_at(2024, 6, 15, 12, 0, 0), // newest - retainable
271            ];
272
273            let plan = strategy.plan(&backups);
274
275            assert_eq!(plan.purgeable.len(), 2);
276            assert_eq!(plan.retainable.len(), 3);
277
278            // The oldest two should be purgeable
279            assert_eq!(plan.purgeable[0].creation_date.hour(), 8);
280            assert_eq!(plan.purgeable[1].creation_date.hour(), 9);
281
282            // The newest three should be retainable
283            assert_eq!(plan.retainable[0].creation_date.hour(), 10);
284            assert_eq!(plan.retainable[1].creation_date.hour(), 11);
285            assert_eq!(plan.retainable[2].creation_date.hour(), 12);
286        }
287
288        #[test]
289        fn statuses_preserve_original_order() {
290            let strategy = Strategy::most_recent(2);
291            let backups = vec![
292                backup_at(2024, 6, 15, 8, 0, 0),
293                backup_at(2024, 6, 15, 9, 0, 0),
294                backup_at(2024, 6, 15, 10, 0, 0),
295                backup_at(2024, 6, 15, 11, 0, 0),
296            ];
297
298            let plan = strategy.plan(&backups);
299
300            // Statuses should be in the same order as input
301            assert_eq!(plan.statuses.len(), 4);
302            assert!(matches!(plan.statuses[0].1, BackupStatus::Purgeable));
303            assert!(matches!(plan.statuses[1].1, BackupStatus::Purgeable));
304            assert!(matches!(plan.statuses[2].1, BackupStatus::Retainable));
305            assert!(matches!(plan.statuses[3].1, BackupStatus::Retainable));
306        }
307
308        #[test]
309        fn k_equals_count_keeps_all() {
310            let strategy = Strategy::most_recent(3);
311            let backups = vec![
312                backup_at(2024, 6, 15, 8, 0, 0),
313                backup_at(2024, 6, 15, 9, 0, 0),
314                backup_at(2024, 6, 15, 10, 0, 0),
315            ];
316
317            let plan = strategy.plan(&backups);
318
319            assert!(plan.purgeable.is_empty());
320            assert_eq!(plan.retainable.len(), 3);
321        }
322
323        #[test]
324        fn k_zero_purges_all() {
325            let strategy = Strategy::most_recent(0);
326            let backups = vec![
327                backup_at(2024, 6, 15, 8, 0, 0),
328                backup_at(2024, 6, 15, 9, 0, 0),
329            ];
330
331            let plan = strategy.plan(&backups);
332
333            assert_eq!(plan.purgeable.len(), 2);
334            assert!(plan.retainable.is_empty());
335        }
336
337        #[test]
338        fn handles_large_catalog() {
339            let strategy = Strategy::most_recent(10);
340            let backups = generate_hourly_backups(100);
341
342            let plan = strategy.plan(&backups);
343
344            assert_eq!(plan.purgeable.len(), 90);
345            assert_eq!(plan.retainable.len(), 10);
346
347            // Verify the retained ones are the most recent
348            for retained in &plan.retainable {
349                // The last 10 backups (indices 90-99)
350                assert!(backups[90..].contains(retained));
351            }
352        }
353    }
354
355    mod strategy_display {
356        use super::*;
357
358        #[test]
359        fn keep_most_recent_shows_count() {
360            let strategy = Strategy::most_recent(42);
361            assert_eq!(format!("{strategy}"), "KeepMostRecent: 42");
362        }
363
364        #[test]
365        fn classic_shows_name() {
366            let strategy = Strategy::Classic;
367            assert_eq!(format!("{strategy}"), "Classic");
368        }
369    }
370
371    mod strategy_constructors {
372        use super::*;
373
374        #[test]
375        fn most_recent_stores_k() {
376            let strategy = Strategy::most_recent(7);
377            match strategy {
378                Strategy::KeepMostRecent { k } => assert_eq!(k, 7),
379                _ => panic!("Expected KeepMostRecent variant"),
380            }
381        }
382    }
383
384    // Note: The Classic strategy uses `Local::now()` internally, making it
385    // non-deterministic and difficult to unit test reliably. To properly test
386    // Classic, consider refactoring `plan()` to accept a `now` parameter,
387    // or create an integration test with a controlled time environment.
388    //
389    // The Classic strategy logic groups backups by:
390    // - Hour (last 24h)
391    // - Day (last 7 days, excluding last 24h)
392    // - Week (last 4 weeks, excluding last 7 days)
393    // - Month (last year, excluding last 4 weeks)
394    //
395    // Each group keeps only the most recent backup within that time window.
396}