rustic_rs/
filtering.rs

1#[cfg(feature = "rhai")]
2use crate::error::RhaiErrorKinds;
3
4#[cfg(feature = "rhai")]
5use std::error::Error;
6use std::{
7    fmt::{Debug, Display},
8    str::FromStr,
9};
10
11#[cfg(feature = "jq")]
12use anyhow::{anyhow, bail};
13use bytesize::ByteSize;
14use derive_more::derive::Display;
15use log::warn;
16use rustic_core::{StringList, repofile::SnapshotFile};
17
18use cached::proc_macro::cached;
19use chrono::{DateTime, Local, NaiveTime};
20use conflate::Merge;
21
22#[cfg(feature = "jq")]
23use jaq_core::{
24    Compiler, Ctx, Filter, Native, RcIter,
25    load::{Arena, File, Loader},
26};
27#[cfg(feature = "jq")]
28use jaq_json::Val;
29#[cfg(feature = "rhai")]
30use rhai::{AST, Dynamic, Engine, FnPtr, serde::to_dynamic};
31use serde::{Deserialize, Serialize};
32#[cfg(feature = "jq")]
33use serde_json::Value;
34use serde_with::{DisplayFromStr, serde_as};
35
36/// A function to filter snapshots
37///
38/// The function is called with a [`SnapshotFile`] and must return a boolean.
39#[cfg(feature = "rhai")]
40#[derive(Clone, Debug)]
41pub(crate) struct SnapshotFn(FnPtr, AST);
42
43#[cfg(feature = "rhai")]
44impl FromStr for SnapshotFn {
45    type Err = RhaiErrorKinds;
46    fn from_str(s: &str) -> Result<Self, Self::Err> {
47        let engine = Engine::new();
48        let ast = engine.compile(s)?;
49        let func = engine.eval_ast::<FnPtr>(&ast)?;
50        Ok(Self(func, ast))
51    }
52}
53
54#[cfg(feature = "rhai")]
55impl SnapshotFn {
56    /// Call the function with a [`SnapshotFile`]
57    ///
58    /// The function must return a boolean.
59    ///
60    /// # Errors
61    ///
62    // TODO!: add errors!
63    fn call<T: Clone + Send + Sync + 'static>(
64        &self,
65        sn: &SnapshotFile,
66    ) -> Result<T, Box<dyn Error>> {
67        let engine = Engine::new();
68        let sn: Dynamic = to_dynamic(sn)?;
69        Ok(self.0.call::<T>(&engine, &self.1, (sn,))?)
70    }
71}
72
73#[cfg(feature = "rhai")]
74#[cached(key = "String", convert = r#"{ s.to_string() }"#, size = 1)]
75fn string_to_fn(s: &str) -> Option<SnapshotFn> {
76    match SnapshotFn::from_str(s) {
77        Ok(filter_fn) => Some(filter_fn),
78        Err(err) => {
79            warn!("Error evaluating filter-fn {s}: {err}",);
80            None
81        }
82    }
83}
84
85#[cfg(feature = "jq")]
86#[derive(Clone)]
87pub(crate) struct SnapshotJq(Filter<Native<Val>>);
88
89#[cfg(feature = "jq")]
90impl FromStr for SnapshotJq {
91    type Err = anyhow::Error;
92    fn from_str(s: &str) -> Result<Self, Self::Err> {
93        let program = File { code: s, path: () };
94        let loader = Loader::new(jaq_std::defs().chain(jaq_json::defs()));
95        let arena = Arena::default();
96        let modules = loader
97            .load(&arena, program)
98            .map_err(|errs| anyhow!("errors loading modules in jq: {errs:?}"))?;
99        let filter = Compiler::<_, Native<_>>::default()
100            .with_funs(jaq_std::funs().chain(jaq_json::funs()))
101            .compile(modules)
102            .map_err(|errs| anyhow!("errors during compiling filters in jq: {errs:?}"))?;
103
104        Ok(Self(filter))
105    }
106}
107
108#[cfg(feature = "jq")]
109impl SnapshotJq {
110    fn call(&self, snap: &SnapshotFile) -> Result<bool, anyhow::Error> {
111        let input = serde_json::to_value(snap)?;
112
113        let inputs = RcIter::new(core::iter::empty());
114        let res = self.0.run((Ctx::new([], &inputs), Val::from(input))).next();
115
116        match res {
117            Some(Ok(val)) => {
118                let val: Value = val.into();
119                match val.as_bool() {
120                    Some(true) => Ok(true),
121                    Some(false) => Ok(false),
122                    None => bail!("expression does not return bool"),
123                }
124            }
125            _ => bail!("expression does not return bool"),
126        }
127    }
128}
129
130#[cfg(feature = "jq")]
131#[cached(key = "String", convert = r#"{ s.to_string() }"#, size = 1)]
132fn string_to_jq(s: &str) -> Option<SnapshotJq> {
133    match SnapshotJq::from_str(s) {
134        Ok(filter_jq) => Some(filter_jq),
135        Err(err) => {
136            warn!("Error evaluating filter-fn {s}: {err}",);
137            None
138        }
139    }
140}
141
142#[serde_as]
143#[derive(Clone, Default, Debug, Serialize, Deserialize, Merge, clap::Parser)]
144#[serde(default, rename_all = "kebab-case", deny_unknown_fields)]
145pub struct SnapshotFilter {
146    /// Hostname to filter (can be specified multiple times)
147    #[clap(long = "filter-host", global = true, value_name = "HOSTNAME")]
148    #[merge(strategy=conflate::vec::overwrite_empty)]
149    filter_hosts: Vec<String>,
150
151    /// Label to filter (can be specified multiple times)
152    #[clap(long = "filter-label", global = true, value_name = "LABEL")]
153    #[merge(strategy=conflate::vec::overwrite_empty)]
154    filter_labels: Vec<String>,
155
156    /// Path list to filter (can be specified multiple times)
157    #[clap(long, global = true, value_name = "PATH[,PATH,..]")]
158    #[serde_as(as = "Vec<DisplayFromStr>")]
159    #[merge(strategy=conflate::vec::overwrite_empty)]
160    filter_paths: Vec<StringList>,
161
162    /// Path list to filter exactly (no superset) as given (can be specified multiple times)
163    #[clap(long, global = true, value_name = "PATH[,PATH,..]")]
164    #[serde_as(as = "Vec<DisplayFromStr>")]
165    #[merge(strategy=conflate::vec::overwrite_empty)]
166    filter_paths_exact: Vec<StringList>,
167
168    /// Tag list to filter (can be specified multiple times)
169    #[clap(long, global = true, value_name = "TAG[,TAG,..]")]
170    #[serde_as(as = "Vec<DisplayFromStr>")]
171    #[merge(strategy=conflate::vec::overwrite_empty)]
172    filter_tags: Vec<StringList>,
173
174    /// Tag list to filter exactly (no superset) as given (can be specified multiple times)
175    #[clap(long, global = true, value_name = "TAG[,TAG,..]")]
176    #[serde_as(as = "Vec<DisplayFromStr>")]
177    #[merge(strategy=conflate::vec::overwrite_empty)]
178    filter_tags_exact: Vec<StringList>,
179
180    /// Only use snapshots which are taken after the given given date/time
181    #[serde_as(as = "Option<DisplayFromStr>")]
182    #[clap(long, global = true, value_name = "DATE(TIME)")]
183    #[merge(strategy=conflate::option::overwrite_none)]
184    filter_after: Option<AfterDate>,
185
186    /// Only use snapshots which are taken before the given given date/time
187    #[serde_as(as = "Option<DisplayFromStr>")]
188    #[clap(long, global = true, value_name = "DATE(TIME)")]
189    #[merge(strategy=conflate::option::overwrite_none)]
190    filter_before: Option<BeforeDate>,
191
192    /// Only use snapshots with total size in given range
193    #[serde_as(as = "Option<DisplayFromStr>")]
194    #[clap(long, global = true, value_name = "SIZE")]
195    #[merge(strategy=conflate::option::overwrite_none)]
196    filter_size: Option<SizeRange>,
197
198    /// Only use snapshots with size added to the repo in given range
199    #[serde_as(as = "Option<DisplayFromStr>")]
200    #[clap(long, global = true, value_name = "SIZE")]
201    #[merge(strategy=conflate::option::overwrite_none)]
202    filter_size_added: Option<SizeRange>,
203
204    /// Only use the last COUNT snapshots for each group
205    #[serde_as(as = "Option<DisplayFromStr>")]
206    #[clap(long, global = true, value_name = "COUNT")]
207    #[merge(strategy=conflate::option::overwrite_none)]
208    filter_last: Option<usize>,
209
210    /// Function to filter snapshots
211    #[cfg(feature = "rhai")]
212    #[clap(long, global = true, value_name = "FUNC")]
213    #[serde_as(as = "Option<DisplayFromStr>")]
214    #[merge(strategy=conflate::option::overwrite_none)]
215    filter_fn: Option<String>,
216
217    /// jq to filter snapshots
218    #[cfg(feature = "jq")]
219    #[clap(long, global = true, value_name = "JQ")]
220    #[serde_as(as = "Option<DisplayFromStr>")]
221    #[merge(strategy=conflate::option::overwrite_none)]
222    filter_jq: Option<String>,
223}
224
225impl SnapshotFilter {
226    /// Check if a [`SnapshotFile`] matches the filter
227    ///
228    /// # Arguments
229    ///
230    /// * `snapshot` - The snapshot to check
231    ///
232    /// # Returns
233    ///
234    /// `true` if the snapshot matches the filter, `false` otherwise
235    #[must_use]
236    pub fn matches(&self, snapshot: &SnapshotFile) -> bool {
237        #[cfg(feature = "rhai")]
238        if let Some(filter_fn) = &self.filter_fn {
239            if let Some(func) = string_to_fn(filter_fn) {
240                match func.call::<bool>(snapshot) {
241                    Ok(result) => {
242                        if !result {
243                            return false;
244                        }
245                    }
246                    Err(err) => {
247                        warn!(
248                            "Error evaluating filter-fn for snapshot {}: {err}",
249                            snapshot.id
250                        );
251                        return false;
252                    }
253                }
254            }
255        }
256        #[cfg(feature = "jq")]
257        if let Some(filter_jq) = &self.filter_jq {
258            if let Some(jq) = string_to_jq(filter_jq) {
259                match jq.call(snapshot) {
260                    Ok(result) => {
261                        if !result {
262                            return false;
263                        }
264                    }
265                    Err(err) => {
266                        warn!(
267                            "Error evaluating filter-jq for snapshot {}: {err}",
268                            snapshot.id
269                        );
270                        return false;
271                    }
272                }
273            }
274        }
275
276        // For the `Option`s we check if the option is set and the condition is not matched. In this case we can early return false.
277        if matches!(&self.filter_after, Some(after) if !after.matches(snapshot.time))
278            || matches!(&self.filter_before, Some(before) if !before.matches(snapshot.time))
279            || matches!((&self.filter_size,&snapshot.summary), (Some(size),Some(summary)) if !size.matches(summary.total_bytes_processed))
280            || matches!((&self.filter_size_added,&snapshot.summary), (Some(size),Some(summary)) if !size.matches(summary.data_added))
281        {
282            return false;
283        }
284
285        // For the the `Vec`s we have two possibilities:
286        // - There exists a suitable matches method on the snapshot item
287        //   (this automatically handles empty filter correctly):
288        snapshot.paths.matches(&self.filter_paths)
289            && snapshot.tags.matches(&self.filter_tags)
290        //  - manually check if the snapshot item is contained in the `Vec`
291        //    but only if the `Vec` is not empty.
292        //    If it is empty, no condition is given.
293            && (self.filter_paths_exact.is_empty()
294                || self.filter_paths_exact.contains(&snapshot.paths))
295            && (self.filter_tags_exact.is_empty()
296                || self.filter_tags_exact.contains(&snapshot.tags))
297            && (self.filter_hosts.is_empty() || self.filter_hosts.contains(&snapshot.hostname))
298            && (self.filter_labels.is_empty() || self.filter_labels.contains(&snapshot.label))
299    }
300
301    pub fn post_process(&self, snapshots: &mut Vec<SnapshotFile>) {
302        snapshots.sort_unstable();
303        if let Some(last) = self.filter_last {
304            let count = snapshots.len();
305            if last < count {
306                let new = snapshots.split_off(count - last);
307                let _ = std::mem::replace(snapshots, new);
308            }
309        }
310    }
311}
312
313#[derive(Debug, Clone, Display)]
314struct AfterDate(DateTime<Local>);
315
316impl AfterDate {
317    fn matches(&self, datetime: DateTime<Local>) -> bool {
318        self.0 < datetime
319    }
320}
321
322impl FromStr for AfterDate {
323    type Err = anyhow::Error;
324    fn from_str(s: &str) -> Result<Self, Self::Err> {
325        let before_midnight = NaiveTime::from_hms_nano_opt(23, 59, 59, 999_999_999).unwrap();
326        let datetime = dateparser::parse_with(s, &Local, before_midnight)?;
327        Ok(Self(datetime.into()))
328    }
329}
330
331#[derive(Debug, Clone, Display)]
332struct BeforeDate(DateTime<Local>);
333
334impl BeforeDate {
335    fn matches(&self, datetime: DateTime<Local>) -> bool {
336        datetime < self.0
337    }
338}
339
340impl FromStr for BeforeDate {
341    type Err = anyhow::Error;
342    fn from_str(s: &str) -> Result<Self, Self::Err> {
343        let midnight = NaiveTime::from_hms_opt(0, 0, 0).unwrap();
344        let datetime = dateparser::parse_with(s, &Local, midnight)?;
345        Ok(Self(datetime.into()))
346    }
347}
348
349#[derive(Debug, Clone)]
350struct SizeRange {
351    from: Option<ByteSize>,
352    to: Option<ByteSize>,
353}
354
355impl SizeRange {
356    fn matches(&self, size: u64) -> bool {
357        // The matches-expression is only true if the `Option` is `Some` and the size is smaller than from.
358        // Hence, !matches is true either if `self.from` is `None` or if the size >= the values
359        !matches!(self.from, Some(from) if size < from.0)
360        // same logic here, but smaller and greater swapped.
361            && !matches!(self.to, Some(to) if size > to.0)
362    }
363}
364
365fn parse_size(s: &str) -> Result<Option<ByteSize>, String> {
366    let s = s.trim();
367    if s.is_empty() {
368        return Ok(None);
369    }
370    Ok(Some(s.parse()?))
371}
372
373impl FromStr for SizeRange {
374    type Err = String;
375    fn from_str(s: &str) -> Result<Self, Self::Err> {
376        let (from, to) = match s.split_once("..") {
377            Some((s1, s2)) => (parse_size(s1)?, parse_size(s2)?),
378            None => (parse_size(s)?, None),
379        };
380        Ok(Self { from, to })
381    }
382}
383
384impl Display for SizeRange {
385    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
386        if let Some(from) = self.from {
387            Display::fmt(&from.display(), f)?;
388        }
389        f.write_str("..")?;
390        if let Some(to) = self.to {
391            Display::fmt(&to.display(), f)?;
392        }
393
394        Ok(())
395    }
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401    use rstest::rstest;
402
403    #[rstest]
404    #[case("..", None, None)]
405    #[case("10", Some(10), None)]
406    #[case("..10k", None, Some(10_000))]
407    #[case("1MB..", Some(1_000_000), None)]
408    #[case("1 MB .. 1 GiB", Some(1_000_000), Some(1_073_741_824))]
409    #[case("10 .. 20 ", Some(10), Some(20))]
410    #[case(" 2G ", Some(2_000_000_000), None)]
411    fn size_range_from_str(
412        #[case] input: SizeRange,
413        #[case] from: Option<u64>,
414        #[case] to: Option<u64>,
415    ) {
416        assert_eq!(input.from.map(|v| v.0), from);
417        assert_eq!(input.to.map(|v| v.0), to);
418    }
419}