backedup/
lib.rs

1#[macro_use]
2extern crate lazy_static;
3
4use std::{fmt, io};
5use std::cmp::Ordering;
6use std::collections::{BTreeMap, BTreeSet, HashMap};
7use std::fmt::{Display, Formatter};
8use std::fs::{read_dir, remove_file};
9use std::hash::Hash;
10use std::path::{Path, PathBuf};
11
12use log::{error, info};
13use regex::Regex;
14use termion::{color, style};
15use termion::color::Fg;
16use thiserror::Error;
17use wildmatch::WildMatch;
18
19#[derive(Debug, Error)]
20pub struct IoError(io::Error);
21
22impl Display for IoError {
23    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
24        self.0.fmt(f)
25    }
26}
27
28impl PartialEq for IoError {
29    fn eq(&self, other: &Self) -> bool {
30        self == other
31    }
32}
33
34#[derive(Error, Debug, PartialEq)]
35pub enum BackedUpError {
36    #[error("Couldn't open directory {path}")]
37    ReadDirError { source: IoError, path: PathBuf },
38    #[error("At least one slot must be configured")]
39    NoSlot,
40    #[error("Invalid regex")]
41    InvalidRegex(#[from] regex::Error),
42    #[error("Regex missing capture group for \"{name}\". -- example: (?P<{name}>\\d{{2}})")]
43    MissingCaptureGroup { name: String },
44}
45
46#[derive(Copy, Clone)]
47pub struct SlotConfig {
48    yearly: usize,
49    monthly: usize,
50    daily: usize,
51    hourly: usize,
52    minutely: usize,
53}
54
55impl SlotConfig {
56    pub fn new(
57        years: usize,
58        months: usize,
59        days: usize,
60        hours: usize,
61        minutes: usize,
62    ) -> Result<Self, BackedUpError> {
63        if years + months + days + hours + minutes == 0 {
64            return Err(BackedUpError::NoSlot);
65        }
66        Ok(Self {
67            yearly: years,
68            monthly: months,
69            daily: days,
70            hourly: hours,
71            minutely: minutes,
72        })
73    }
74}
75
76pub struct Config {
77    slots: SlotConfig,
78    pattern: Vec<WildMatch>,
79    re: Regex,
80}
81
82impl Config {
83    /// [String] pattern(s) will filter entries with wildcard expressions - see [WildMatch] for details
84    /// An empty [Vec] implies no filter
85    ///
86    /// An optional regex [String] can be provided for parsing into timeslots.
87    /// At least `year`, `month` and `day` must be provided as named groups
88    pub fn new(
89        slot_config: SlotConfig,
90        pattern: &[String],
91        re_str: Option<&str>,
92    ) -> Result<Self, BackedUpError> {
93        let pattern = pattern.into_iter().map(|s| WildMatch::new(s)).collect();
94        let re = match re_str {
95            None => (*RE).clone(),
96            Some(s) => Regex::new(s).map_err(|e| BackedUpError::InvalidRegex(e))?,
97        };
98        let capture_names: Vec<_> = re.capture_names().flatten().collect();
99        for i in ["year", "month", "day"].iter() {
100            if !capture_names.contains(i) {
101                return Err(BackedUpError::MissingCaptureGroup {
102                    name: i.to_string(),
103                });
104            }
105        }
106        Ok(Self {
107            slots: slot_config,
108            pattern,
109            re,
110        })
111    }
112}
113
114#[derive(Debug, Clone, PartialOrd, PartialEq, Eq, Hash)]
115struct BackupEntry<'a> {
116    year: u16,
117    month: u8,
118    day: u8,
119    hour: u8,
120    minute: u8,
121    path: &'a Path,
122}
123
124impl<'a> BackupEntry<'a> {
125    fn new(path: &'a Path, pattern: &[WildMatch], re: &Regex) -> Option<Self> {
126        let filename = path.file_name()?.to_str()?;
127        if !pattern.is_empty() && !pattern.iter().any(|w| w.matches(filename)) {
128            return None;
129        }
130        let m = re.captures(filename)?;
131        let year = m.name("year")?.as_str().parse().ok()?;
132        let month = m.name("month")?.as_str().parse().ok()?;
133        let day = m.name("day")?.as_str().parse().ok()?;
134        let hour = m
135            .name("hour")
136            .and_then(|s| s.as_str().parse().ok())
137            .unwrap_or(0);
138        let minute = m
139            .name("minute")
140            .and_then(|s| s.as_str().parse().ok())
141            .unwrap_or(0);
142        Some(Self {
143            year,
144            month,
145            day,
146            hour,
147            minute,
148            path,
149        })
150    }
151
152    fn get_ordering_tuple(&self) -> (u16, u8, u8, u8, u8) {
153        (self.year, self.month, self.day, self.hour, self.minute)
154    }
155}
156
157impl<'a> Ord for BackupEntry<'a> {
158    fn cmp(&self, other: &Self) -> Ordering {
159        self.get_ordering_tuple().cmp(&other.get_ordering_tuple())
160    }
161}
162
163#[derive(Copy, Clone, Debug)]
164pub enum Period {
165    Years,
166    Months,
167    Days,
168    Hours,
169    Minutes,
170}
171
172impl Period {
173    fn to_string(&self) -> &'static str {
174        match self {
175            Period::Years => "Years",
176            Period::Months => "Months",
177            Period::Days => "Days",
178            Period::Hours => "Hours",
179            Period::Minutes => "Minutes",
180        }
181    }
182}
183
184lazy_static! {
185    static ref RE: Regex = Regex::new(
186        r"(?x)(?P<year>\d{4}) \D?
187(?P<month>\d{2}) \D?
188(?P<day>\d{2}) \D?
189(
190   # Optional components.
191   (?P<hour>\d{2}) \D?
192   (?P<minute>\d{2}) \D?
193   (?P<second>\d{2})?
194)?"
195    )
196    .unwrap();
197}
198
199/// Plan for keeping/removing [`PathBuf`] with configured slots.
200///
201/// [`PathBuf`] that are invalid strings aren't considered for either retention or deletion.
202pub struct Plan {
203    pub to_keep: Vec<PathBuf>,
204    pub to_remove: Vec<PathBuf>,
205    period_map: HashMap<PathBuf, Vec<Period>>,
206}
207
208impl Display for Plan {
209    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
210        writeln!(f, "Plan to:\n")?;
211        if self.to_keep.is_empty() && self.to_remove.is_empty() {
212            writeln!(f, "\tDo nothing: no valid timestamps")?;
213            return Ok(());
214        }
215        writeln!(
216            f,
217            "\t{}Keep {} file(s) matching {}period(s)",
218            Fg(color::Green),
219            &self.to_keep.len(),
220            style::Reset
221        )?;
222        for i in &self.to_keep {
223            write!(
224                f,
225                "\t\t{}{} {}",
226                Fg(color::Green),
227                i.to_str().unwrap(),
228                style::Reset
229            )?;
230            let periods = self.period_map.get(i).unwrap();
231            let periods: Vec<_> = periods.iter().map(|x| x.to_string()).collect();
232            writeln!(f, "-> ({})", periods.join(","))?;
233        }
234        writeln!(f, "")?;
235        writeln!(
236            f,
237            "\t{}Remove {} file(s) not matching periods",
238            Fg(color::Red),
239            &self.to_remove.len()
240        )?;
241        for i in &self.to_remove {
242            writeln!(f, "\t\t{}", i.to_str().unwrap())?;
243        }
244        Ok(())
245    }
246}
247
248impl Plan {
249    pub fn new<P: AsRef<Path>>(config: &Config, path: P) -> Result<Self, BackedUpError> {
250        let dir = read_dir(&path).map_err(|e| BackedUpError::ReadDirError {
251            source: IoError(e),
252            path: path.as_ref().to_path_buf(),
253        })?;
254        let entries: Vec<_> = dir.flatten().map(|x| x.path()).collect();
255        Ok(Self::from(&config, &entries))
256    }
257
258    fn from(config: &Config, entries: &[PathBuf]) -> Self {
259        let entries: BTreeSet<_> = entries
260            .into_iter()
261            .filter_map(|x| BackupEntry::new(x, &config.pattern, &config.re))
262            .collect();
263        let mut year_slots = BTreeMap::new();
264        let mut month_slots = BTreeMap::new();
265        let mut day_slots = BTreeMap::new();
266        let mut hour_slots = BTreeMap::new();
267        let mut minute_slots = BTreeMap::new();
268        for entry in entries.iter().rev() {
269            year_slots.insert(entry.year, entry);
270            month_slots.insert((entry.year, entry.month), entry);
271            day_slots.insert((entry.year, entry.month, entry.day), entry);
272            hour_slots.insert((entry.year, entry.month, entry.day, entry.hour), entry);
273            minute_slots.insert(
274                (entry.year, entry.month, entry.day, entry.hour, entry.minute),
275                entry,
276            );
277        }
278
279        let mut to_keep = BTreeSet::new();
280        let mut period_map: HashMap<PathBuf, Vec<Period>> = HashMap::new();
281        let SlotConfig {
282            yearly,
283            monthly,
284            daily,
285            hourly,
286            minutely,
287        } = config.slots;
288        for (_, entry) in year_slots.into_iter().rev().take(yearly) {
289            to_keep.insert(entry.clone());
290            period_map
291                .entry(entry.path.to_path_buf())
292                .or_default()
293                .push(Period::Years);
294        }
295        for (_, entry) in month_slots.into_iter().rev().take(monthly) {
296            to_keep.insert(entry.clone());
297            period_map
298                .entry(entry.path.to_path_buf())
299                .or_default()
300                .push(Period::Months);
301        }
302        for (_, entry) in day_slots.into_iter().rev().take(daily) {
303            to_keep.insert(entry.clone());
304            period_map
305                .entry(entry.path.to_path_buf())
306                .or_default()
307                .push(Period::Days);
308        }
309        for (_, entry) in hour_slots.into_iter().rev().take(hourly) {
310            to_keep.insert(entry.clone());
311            period_map
312                .entry(entry.path.to_path_buf())
313                .or_default()
314                .push(Period::Hours);
315        }
316        for (_, entry) in minute_slots.into_iter().rev().take(minutely) {
317            to_keep.insert(entry.clone());
318            period_map
319                .entry(entry.path.to_path_buf())
320                .or_default()
321                .push(Period::Minutes);
322        }
323
324        let to_remove: Vec<_> = entries
325            .difference(&to_keep)
326            .map(|x| x.path.to_path_buf())
327            .collect();
328        let to_keep: Vec<_> = to_keep.into_iter().map(|x| x.path.to_path_buf()).collect();
329        assert_eq!(entries.len(), &to_keep.len() + &to_remove.len());
330        Self {
331            to_keep,
332            to_remove,
333            period_map,
334        }
335    }
336
337    /// Execute plan and remove timestamped files not matching any slots
338    pub fn execute(&self) {
339        if self.to_remove.is_empty() {
340            info!("No file to remove")
341        }
342        for p in self.to_remove.iter() {
343            let filename = p.to_str().unwrap();
344            match remove_file(p) {
345                Ok(_) => {
346                    info!("removed file {}", filename)
347                }
348                Err(e) => {
349                    error!("failed to remove file \"{}\": {}", filename, e)
350                }
351            }
352        }
353    }
354}
355
356#[cfg(test)]
357mod tests {
358    use std::ffi::OsString;
359
360    use chrono::{DateTime, Duration, TimeZone, Utc};
361
362    use super::*;
363
364    fn create_test_data(
365        fmt: &str,
366        mut start_dt: DateTime<Utc>,
367        days: usize,
368        extension: &str,
369    ) -> Vec<PathBuf> {
370        let mut result = Vec::new();
371        let fmt = format!("{}{}", fmt, extension);
372        for _ in 0..days {
373            let path = PathBuf::from(start_dt.format(&fmt).to_string());
374            result.push(path);
375            start_dt = start_dt - Duration::days(1);
376        }
377        result
378    }
379
380    #[test]
381    fn test_make_plan() {
382        let fmt = "%Y-%m-%d";
383        let mut parsed_backups =
384            create_test_data(fmt, Utc.ymd(2015, 1, 1).and_hms(0, 0, 0), 400, "");
385
386        // no effect for number of matches until changing pattern
387        parsed_backups.append(&mut create_test_data(
388            fmt,
389            Utc.ymd(2015, 1, 1).and_hms(0, 0, 0),
390            30,
391            ".log",
392        ));
393        let slot_config = SlotConfig::new(3, 0, 0, 0, 0).unwrap();
394        let mut config = Config::new(slot_config, &vec![], None).unwrap();
395
396        let plan = Plan::from(&config, &parsed_backups);
397        assert_eq!(plan.to_keep.len(), 3);
398
399        config.slots.monthly = 13;
400        let plan = Plan::from(&config, &parsed_backups);
401        assert_eq!(plan.to_keep.len(), 14);
402
403        config.slots.daily = 30;
404        let plan = Plan::from(&config, &parsed_backups);
405        assert_eq!(plan.to_keep.len(), 43);
406
407        config.pattern = vec![WildMatch::new("*.log")];
408        let plan = Plan::from(&config, &parsed_backups);
409        assert_eq!(plan.to_keep.len(), 30);
410    }
411
412    #[test]
413    fn test_custom_regex() {
414        let fmt = "%y%m%d";
415        let parsed_backups = create_test_data(fmt, Utc.ymd(2015, 1, 1).and_hms(0, 0, 0), 400, "");
416        let slot_config = SlotConfig::new(3, 13, 30, 0, 0).unwrap();
417        let re_str = r"(?P<year>\d{2})(?P<month>\d{2})(?P<day>\d{2})";
418        let config = Config::new(slot_config, &vec![], Some(re_str)).unwrap();
419        let plan = Plan::from(&config, &parsed_backups);
420        assert_eq!(plan.to_keep.len(), 43);
421    }
422
423    #[test]
424    fn test_no_slot() {
425        let slot_config = SlotConfig::new(0, 0, 0, 0, 0);
426        assert_eq!(BackedUpError::NoSlot, slot_config.err().unwrap());
427    }
428
429    #[test]
430    fn test_missing_named_group() {
431        let slot_config = SlotConfig::new(1, 0, 0, 0, 0).unwrap();
432        let re_str = r"(?P<month>\d{2})(?P<day>\d{2})";
433
434        let config = Config::new(slot_config, &vec![], Some(re_str));
435        assert_eq!(
436            BackedUpError::MissingCaptureGroup {
437                name: "year".to_string()
438            },
439            config.err().unwrap()
440        );
441
442        let re_str = r"(?P<year>\d{2})(?P<day>\d{2})";
443        let config = Config::new(slot_config, &vec![], Some(re_str));
444        assert_eq!(
445            BackedUpError::MissingCaptureGroup {
446                name: "month".to_string()
447            },
448            config.err().unwrap()
449        );
450
451        let re_str = r"(?P<year>\d{2})(?P<month>\d{2})";
452        let config = Config::new(slot_config, &vec![], Some(re_str));
453        assert_eq!(
454            BackedUpError::MissingCaptureGroup {
455                name: "day".to_string()
456            },
457            config.err().unwrap()
458        );
459    }
460
461    #[test]
462    fn test_invalid_regex() {
463        let re_str = r"/(notaregex";
464        let slot_config = SlotConfig::new(1, 0, 0, 0, 0).unwrap();
465        let config = Config::new(slot_config, &vec![], Some(re_str));
466        assert!(matches!(
467            config.err().unwrap(),
468            BackedUpError::InvalidRegex(_)
469        ))
470    }
471
472    #[cfg(target_family = "unix")]
473    #[test]
474    fn test_invalid_utf_entry() {
475        use std::os::unix::ffi::OsStringExt;
476        let invalid_utf = b"2021-04-11\xe7";
477        let path = PathBuf::from(OsString::from_vec(invalid_utf.to_vec()));
478        let entry = BackupEntry::new(&path, &vec![], &RE);
479        assert_eq!(entry, None);
480    }
481}