Skip to main content

rustic_rs/
filtering.rs

1#[cfg(feature = "rhai")]
2use crate::error::RhaiErrorKinds;
3
4#[cfg(feature = "rhai")]
5use std::error::Error;
6use std::{
7    fmt::{Debug, Display},
8    str::FromStr,
9};
10
11#[cfg(feature = "jq")]
12use anyhow::{anyhow, bail};
13use bytesize::ByteSize;
14use derive_more::derive::Display;
15use jiff::{Zoned, civil::Time, tz::TimeZone};
16use log::warn;
17use rustic_core::{
18    StringList,
19    repofile::{RusticTime, SnapshotFile},
20};
21
22use cached::proc_macro::cached;
23use conflate::Merge;
24
25#[cfg(feature = "jq")]
26use jaq_core::{
27    Compiler, Ctx, Filter, Native, RcIter,
28    load::{Arena, File, Loader},
29};
30#[cfg(feature = "jq")]
31use jaq_json::Val;
32#[cfg(feature = "rhai")]
33use rhai::{AST, Dynamic, Engine, FnPtr, serde::to_dynamic};
34use serde::{Deserialize, Serialize};
35#[cfg(feature = "jq")]
36use serde_json::Value;
37use serde_with::{DisplayFromStr, serde_as};
38
39/// A function to filter snapshots
40///
41/// The function is called with a [`SnapshotFile`] and must return a boolean.
42#[cfg(feature = "rhai")]
43#[derive(Clone, Debug)]
44pub(crate) struct SnapshotFn(FnPtr, AST);
45
46#[cfg(feature = "rhai")]
47impl FromStr for SnapshotFn {
48    type Err = RhaiErrorKinds;
49    fn from_str(s: &str) -> Result<Self, Self::Err> {
50        let engine = Engine::new();
51        let ast = engine.compile(s)?;
52        let func = engine.eval_ast::<FnPtr>(&ast)?;
53        Ok(Self(func, ast))
54    }
55}
56
57#[cfg(feature = "rhai")]
58impl SnapshotFn {
59    /// Call the function with a [`SnapshotFile`]
60    ///
61    /// The function must return a boolean.
62    ///
63    /// # Errors
64    ///
65    // TODO!: add errors!
66    fn call<T: Clone + Send + Sync + 'static>(
67        &self,
68        sn: &SnapshotFile,
69    ) -> Result<T, Box<dyn Error>> {
70        let engine = Engine::new();
71        let sn: Dynamic = to_dynamic(sn)?;
72        Ok(self.0.call::<T>(&engine, &self.1, (sn,))?)
73    }
74}
75
76#[cfg(feature = "rhai")]
77#[cached(key = "String", convert = r#"{ s.to_string() }"#, size = 1)]
78fn string_to_fn(s: &str) -> Option<SnapshotFn> {
79    match SnapshotFn::from_str(s) {
80        Ok(filter_fn) => Some(filter_fn),
81        Err(err) => {
82            warn!("Error evaluating filter-fn {s}: {err}",);
83            None
84        }
85    }
86}
87
88#[cfg(feature = "jq")]
89#[derive(Clone)]
90pub(crate) struct SnapshotJq(Filter<Native<Val>>);
91
92#[cfg(feature = "jq")]
93impl FromStr for SnapshotJq {
94    type Err = anyhow::Error;
95    fn from_str(s: &str) -> Result<Self, Self::Err> {
96        let program = File { code: s, path: () };
97        let loader = Loader::new(jaq_std::defs().chain(jaq_json::defs()));
98        let arena = Arena::default();
99        let modules = loader
100            .load(&arena, program)
101            .map_err(|errs| anyhow!("errors loading modules in jq: {errs:?}"))?;
102        let filter = Compiler::<_, Native<_>>::default()
103            .with_funs(jaq_std::funs().chain(jaq_json::funs()))
104            .compile(modules)
105            .map_err(|errs| anyhow!("errors during compiling filters in jq: {errs:?}"))?;
106
107        Ok(Self(filter))
108    }
109}
110
111#[cfg(feature = "jq")]
112impl SnapshotJq {
113    fn call(&self, snap: &SnapshotFile) -> Result<bool, anyhow::Error> {
114        let input = serde_json::to_value(snap)?;
115
116        let inputs = RcIter::new(core::iter::empty());
117        let res = self.0.run((Ctx::new([], &inputs), Val::from(input))).next();
118
119        match res {
120            Some(Ok(val)) => {
121                let val: Value = val.into();
122                match val.as_bool() {
123                    Some(true) => Ok(true),
124                    Some(false) => Ok(false),
125                    None => bail!("expression does not return bool"),
126                }
127            }
128            _ => bail!("expression does not return bool"),
129        }
130    }
131}
132
133#[cfg(feature = "jq")]
134#[cached(key = "String", convert = r#"{ s.to_string() }"#, size = 1)]
135fn string_to_jq(s: &str) -> Option<SnapshotJq> {
136    match SnapshotJq::from_str(s) {
137        Ok(filter_jq) => Some(filter_jq),
138        Err(err) => {
139            warn!("Error evaluating filter-fn {s}: {err}",);
140            None
141        }
142    }
143}
144
145#[serde_as]
146#[derive(Clone, Default, Debug, Serialize, Deserialize, Merge, clap::Parser)]
147#[serde(default, rename_all = "kebab-case", deny_unknown_fields)]
148pub struct SnapshotFilter {
149    /// Hostname to filter (can be specified multiple times)
150    #[clap(long = "filter-host", global = true, value_name = "HOSTNAME")]
151    #[merge(strategy=conflate::vec::overwrite_empty)]
152    filter_hosts: Vec<String>,
153
154    /// Label to filter (can be specified multiple times)
155    #[clap(long = "filter-label", global = true, value_name = "LABEL")]
156    #[merge(strategy=conflate::vec::overwrite_empty)]
157    filter_labels: Vec<String>,
158
159    /// Path list to filter (can be specified multiple times)
160    #[clap(long, global = true, value_name = "PATH[,PATH,..]")]
161    #[serde_as(as = "Vec<DisplayFromStr>")]
162    #[merge(strategy=conflate::vec::overwrite_empty)]
163    filter_paths: Vec<StringList>,
164
165    /// Path list to filter exactly (no superset) as given (can be specified multiple times)
166    #[clap(long, global = true, value_name = "PATH[,PATH,..]")]
167    #[serde_as(as = "Vec<DisplayFromStr>")]
168    #[merge(strategy=conflate::vec::overwrite_empty)]
169    filter_paths_exact: Vec<StringList>,
170
171    /// Tag list to filter (can be specified multiple times)
172    #[clap(long, global = true, value_name = "TAG[,TAG,..]")]
173    #[serde_as(as = "Vec<DisplayFromStr>")]
174    #[merge(strategy=conflate::vec::overwrite_empty)]
175    filter_tags: Vec<StringList>,
176
177    /// Tag list to filter exactly (no superset) as given (can be specified multiple times)
178    #[clap(long, global = true, value_name = "TAG[,TAG,..]")]
179    #[serde_as(as = "Vec<DisplayFromStr>")]
180    #[merge(strategy=conflate::vec::overwrite_empty)]
181    filter_tags_exact: Vec<StringList>,
182
183    /// Only use snapshots which are taken after the given given date/time
184    #[serde_as(as = "Option<DisplayFromStr>")]
185    #[clap(long, global = true, value_name = "DATE(TIME)")]
186    #[merge(strategy=conflate::option::overwrite_none)]
187    filter_after: Option<AfterDate>,
188
189    /// Only use snapshots which are taken before the given given date/time
190    #[serde_as(as = "Option<DisplayFromStr>")]
191    #[clap(long, global = true, value_name = "DATE(TIME)")]
192    #[merge(strategy=conflate::option::overwrite_none)]
193    filter_before: Option<BeforeDate>,
194
195    /// Only use snapshots with total size in given range
196    #[serde_as(as = "Option<DisplayFromStr>")]
197    #[clap(long, global = true, value_name = "SIZE")]
198    #[merge(strategy=conflate::option::overwrite_none)]
199    filter_size: Option<SizeRange>,
200
201    /// Only use snapshots with size added to the repo in given range
202    #[serde_as(as = "Option<DisplayFromStr>")]
203    #[clap(long, global = true, value_name = "SIZE")]
204    #[merge(strategy=conflate::option::overwrite_none)]
205    filter_size_added: Option<SizeRange>,
206
207    /// Only use the last COUNT snapshots for each group
208    #[serde_as(as = "Option<DisplayFromStr>")]
209    #[clap(long, global = true, value_name = "COUNT")]
210    #[merge(strategy=conflate::option::overwrite_none)]
211    filter_last: Option<usize>,
212
213    /// Function to filter snapshots
214    #[cfg(feature = "rhai")]
215    #[clap(long, global = true, value_name = "FUNC")]
216    #[serde_as(as = "Option<DisplayFromStr>")]
217    #[merge(strategy=conflate::option::overwrite_none)]
218    filter_fn: Option<String>,
219
220    /// jq to filter snapshots
221    #[cfg(feature = "jq")]
222    #[clap(long, global = true, value_name = "JQ")]
223    #[serde_as(as = "Option<DisplayFromStr>")]
224    #[merge(strategy=conflate::option::overwrite_none)]
225    filter_jq: Option<String>,
226}
227
228impl SnapshotFilter {
229    /// Check if a [`SnapshotFile`] matches the filter
230    ///
231    /// # Arguments
232    ///
233    /// * `snapshot` - The snapshot to check
234    ///
235    /// # Returns
236    ///
237    /// `true` if the snapshot matches the filter, `false` otherwise
238    #[must_use]
239    pub fn matches(&self, snapshot: &SnapshotFile) -> bool {
240        #[cfg(feature = "rhai")]
241        if let Some(filter_fn) = &self.filter_fn
242            && let Some(func) = string_to_fn(filter_fn)
243        {
244            match func.call::<bool>(snapshot) {
245                Ok(result) => {
246                    if !result {
247                        return false;
248                    }
249                }
250                Err(err) => {
251                    warn!(
252                        "Error evaluating filter-fn for snapshot {}: {err}",
253                        snapshot.id
254                    );
255                    return false;
256                }
257            }
258        }
259        #[cfg(feature = "jq")]
260        if let Some(filter_jq) = &self.filter_jq
261            && let Some(jq) = string_to_jq(filter_jq)
262        {
263            match jq.call(snapshot) {
264                Ok(result) => {
265                    if !result {
266                        return false;
267                    }
268                }
269                Err(err) => {
270                    warn!(
271                        "Error evaluating filter-jq for snapshot {}: {err}",
272                        snapshot.id
273                    );
274                    return false;
275                }
276            }
277        }
278
279        // For the `Option`s we check if the option is set and the condition is not matched. In this case we can early return false.
280        if matches!(&self.filter_after, Some(after) if !after.matches(&snapshot.time))
281            || matches!(&self.filter_before, Some(before) if !before.matches(&snapshot.time))
282            || matches!((&self.filter_size,&snapshot.summary), (Some(size),Some(summary)) if !size.matches(summary.total_bytes_processed))
283            || matches!((&self.filter_size_added,&snapshot.summary), (Some(size),Some(summary)) if !size.matches(summary.data_added))
284        {
285            return false;
286        }
287
288        // For the the `Vec`s we have two possibilities:
289        // - There exists a suitable matches method on the snapshot item
290        //   (this automatically handles empty filter correctly):
291        snapshot.paths.matches(&self.filter_paths)
292            && snapshot.tags.matches(&self.filter_tags)
293        //  - manually check if the snapshot item is contained in the `Vec`
294        //    but only if the `Vec` is not empty.
295        //    If it is empty, no condition is given.
296            && (self.filter_paths_exact.is_empty()
297                || self.filter_paths_exact.contains(&snapshot.paths))
298            && (self.filter_tags_exact.is_empty()
299                || self.filter_tags_exact.contains(&snapshot.tags))
300            && (self.filter_hosts.is_empty() || self.filter_hosts.contains(&snapshot.hostname))
301            && (self.filter_labels.is_empty() || self.filter_labels.contains(&snapshot.label))
302    }
303
304    pub fn post_process(&self, snapshots: &mut Vec<SnapshotFile>) {
305        snapshots.sort_unstable();
306        if let Some(last) = self.filter_last {
307            let count = snapshots.len();
308            if last < count {
309                let new = snapshots.split_off(count - last);
310                let _ = std::mem::replace(snapshots, new);
311            }
312        }
313    }
314}
315
316#[derive(Debug, Clone, Display)]
317struct AfterDate(Zoned);
318
319impl AfterDate {
320    fn matches(&self, datetime: &Zoned) -> bool {
321        &self.0 < datetime
322    }
323}
324
325impl FromStr for AfterDate {
326    type Err = anyhow::Error;
327    fn from_str(s: &str) -> Result<Self, Self::Err> {
328        Ok(Self(RusticTime::parse(s, Time::MAX, TimeZone::system())?))
329    }
330}
331
332#[derive(Debug, Clone, Display)]
333struct BeforeDate(Zoned);
334
335impl BeforeDate {
336    fn matches(&self, datetime: &Zoned) -> bool {
337        datetime < self.0
338    }
339}
340
341impl FromStr for BeforeDate {
342    type Err = anyhow::Error;
343    fn from_str(s: &str) -> Result<Self, Self::Err> {
344        Ok(Self(RusticTime::parse_system(s)?))
345    }
346}
347
348#[derive(Debug, Clone)]
349struct SizeRange {
350    from: Option<ByteSize>,
351    to: Option<ByteSize>,
352}
353
354impl SizeRange {
355    fn matches(&self, size: u64) -> bool {
356        // The matches-expression is only true if the `Option` is `Some` and the size is smaller than from.
357        // Hence, !matches is true either if `self.from` is `None` or if the size >= the values
358        !matches!(self.from, Some(from) if size < from.0)
359        // same logic here, but smaller and greater swapped.
360            && !matches!(self.to, Some(to) if size > to.0)
361    }
362}
363
364fn parse_size(s: &str) -> Result<Option<ByteSize>, String> {
365    let s = s.trim();
366    if s.is_empty() {
367        return Ok(None);
368    }
369    Ok(Some(s.parse()?))
370}
371
372impl FromStr for SizeRange {
373    type Err = String;
374    fn from_str(s: &str) -> Result<Self, Self::Err> {
375        let (from, to) = match s.split_once("..") {
376            Some((s1, s2)) => (parse_size(s1)?, parse_size(s2)?),
377            None => (parse_size(s)?, None),
378        };
379        Ok(Self { from, to })
380    }
381}
382
383impl Display for SizeRange {
384    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
385        if let Some(from) = self.from {
386            Display::fmt(&from.display(), f)?;
387        }
388        f.write_str("..")?;
389        if let Some(to) = self.to {
390            Display::fmt(&to.display(), f)?;
391        }
392
393        Ok(())
394    }
395}
396
397#[cfg(test)]
398mod tests {
399    use super::*;
400    use rstest::rstest;
401
402    #[rstest]
403    #[case("..", None, None)]
404    #[case("10", Some(10), None)]
405    #[case("..10k", None, Some(10_000))]
406    #[case("1MB..", Some(1_000_000), None)]
407    #[case("1 MB .. 1 GiB", Some(1_000_000), Some(1_073_741_824))]
408    #[case("10 .. 20 ", Some(10), Some(20))]
409    #[case(" 2G ", Some(2_000_000_000), None)]
410    fn size_range_from_str(
411        #[case] input: SizeRange,
412        #[case] from: Option<u64>,
413        #[case] to: Option<u64>,
414    ) {
415        assert_eq!(input.from.map(|v| v.0), from);
416        assert_eq!(input.to.map(|v| v.0), to);
417    }
418}