notcoal/
filter.rs

1use std::collections::hash_map::DefaultHasher;
2use std::collections::BTreeMap;
3use std::collections::HashMap;
4use std::convert::AsRef;
5use std::fs::File;
6use std::hash::Hasher;
7use std::io::Read;
8use std::iter::Iterator;
9
10use mailparse::*;
11use regex::Regex;
12use serde::{Deserialize, Serialize};
13
14use crate::error::Error::*;
15use crate::error::*;
16
17use crate::Operations;
18use crate::Value;
19use crate::Value::*;
20
21use notmuch::{Database, Message, Query, Threads};
22
23#[derive(Debug, Serialize, Deserialize, Default)]
24#[serde(deny_unknown_fields)]
25pub struct Filter {
26    name: Option<String>,
27    /// Description
28    ///
29    /// Not really used for anything at this point in time, but may be good for
30    /// users to remember what this specific filter is doing
31    pub desc: Option<String>,
32    /// List of rules
33    ///
34    /// This list is an OR list, meaning the filter will match if any rule
35    /// matches. However, AND combinations may happen within a rule
36    // at the moment, since we are generating a hash in the name function this
37    // field needs to be consistent in the order it prints its key/value pairs
38    pub rules: Vec<BTreeMap<String, Value>>,
39    /// Operations that will be applied if this any rule matches
40    pub op: Operations,
41    #[serde(skip)]
42    re: Vec<HashMap<String, Vec<Regex>>>,
43}
44
45impl Filter {
46    pub fn new() -> Self {
47        Default::default()
48    }
49
50    /// Returns either the set name, or a hash of [`Filter::rules`]. Please
51    /// note: hashed names are not used for serialization.
52    ///
53    /// [`Filter::rules`]: struct.Filter.html#structfield.rules
54    pub fn name(&self) -> String {
55        match &self.name {
56            Some(name) => name.clone(),
57            None => {
58                // XXX This seems dumb, there has to be a better way
59                let mut h = DefaultHasher::new();
60                let buf = format!("{:?}", self.rules);
61                for byte in buf.as_bytes() {
62                    h.write_u8(*byte);
63                }
64                format!("{:x}", h.finish())
65            }
66        }
67    }
68
69    pub fn set_name(&mut self, name: &str) {
70        self.name = Some(name.to_string());
71    }
72
73    /// When filters are deserialized from json or have been assembled via code,
74    /// the regular expressions contained in [`Filter::rules`] need to be
75    /// compiled before any matches are to be made.
76    ///
77    /// [`Filter::rules`]: struct.Filter.html#structfield.rules
78    pub fn compile(mut self) -> Result<Self> {
79        for rule in &self.rules {
80            let mut compiled = HashMap::new();
81            for (key, value) in rule.iter() {
82                let mut res = Vec::new();
83                match value {
84                    Single(re) => res.push(Regex::new(re)?),
85                    Multiple(mre) => {
86                        for re in mre {
87                            res.push(Regex::new(re)?);
88                        }
89                    }
90                    _ => {
91                        let e = "Not a regular expression".to_string();
92                        return Err(UnsupportedValue(e));
93                    }
94                }
95                compiled.insert(key.to_string(), res);
96            }
97            self.re.push(compiled);
98        }
99        Ok(self)
100    }
101
102    /// Combines [`Filter::is_match`] and [`Operations::apply`]
103    ///
104    /// Returns a tuple of two bools, the first representing if the filter has
105    /// been applied, the second if the operation deleted the message that was
106    /// supplied
107    ///
108    /// [`Filter::is_match`]: struct.Filter.html#method.is_match
109    /// [`Operations::apply`]: struct.Operations.html#method.apply
110    pub fn apply_if_match(&self, msg: &Message, db: &Database) -> Result<(bool, bool)> {
111        if self.is_match(msg, db)? {
112            Ok((true, self.op.apply(msg, db, &self.name())?))
113        } else {
114            Ok((false, false))
115        }
116    }
117
118    /// Checks if the supplied message matches any of the combinations described
119    /// in [`Filter::rules`]
120    ///
121    /// [`Filter::rules`]: struct.Filter.html#structfield.rules
122    pub fn is_match(&self, msg: &Message, db: &Database) -> Result<bool> {
123        /// Test if any of the supplied values match any of our supplied regular
124        /// expressions.
125        fn sub_match<I, S>(res: &[Regex], values: I) -> bool
126        where
127            S: AsRef<str>,
128            I: Iterator<Item = S>,
129        {
130            for value in values {
131                for re in res {
132                    if re.is_match(value.as_ref()) {
133                        return true;
134                    }
135                }
136            }
137            false
138        }
139
140        // self.re will only be populated after self.compile()
141        if self.re.len() != self.rules.len() {
142            let e = "Filters need to be compiled before tested".to_string();
143            return Err(RegexUncompiled(e));
144        }
145
146        for rule in &self.re {
147            let mut is_match = true;
148            for (part, res) in rule {
149                let q: Query;
150                let mut r: Threads;
151                if part == "@path" {
152                    // XXX we might want to return an error here if we can't
153                    // make the path to a valid utf-8 str? Or maybe go for
154                    // to_str_lossy?
155                    let vs = msg
156                        .filenames()
157                        .filter_map(|f| f.to_str().map(|n| n.to_string()));
158                    is_match = sub_match(res, vs) && is_match;
159                } else if part == "@tags" {
160                    is_match = sub_match(res, msg.tags()) && is_match;
161                } else if part == "@thread-tags" {
162                    // creating a new query as we don't have information about
163                    // our own thread yet
164                    q = db.create_query(&format!("thread:{}", msg.thread_id()))?;
165                    r = q.search_threads()?;
166                    if let Some(thread) = r.next() {
167                        is_match = sub_match(res, thread.tags()) && is_match;
168                    }
169                } else if part == "@attachment" || part == "@attachment-body" || part == "@body" {
170                    // since we might combine these we try avoid parsing the
171                    // same file over and over again.
172                    let mut buf = Vec::new();
173                    // XXX-file notmuch says it returns a random filename if
174                    // multiple are present. Question is if the new tag is even
175                    // applied to messages we've already seen, do we ever run
176                    // into that being a problem at all?
177                    let mut file = File::open(msg.filename())?;
178                    file.read_to_end(&mut buf)?;
179                    let parsed = parse_mail(&buf)?;
180                    if part == "@attachment" {
181                        // XXX Check if this can be refactored with less cloning
182                        let fns = parsed
183                            .subparts
184                            .iter()
185                            .map(|s| s.get_content_disposition().params.get("filename").cloned())
186                            .collect::<Vec<Option<String>>>();
187                        let fns = fns.iter().filter_map(|f| f.clone());
188                        is_match = sub_match(res, fns) && is_match;
189                    } else if part == "@body" {
190                        is_match = sub_match(res, [parsed.get_body()?].iter()) && is_match;
191                    } else if part == "@attachment-body" {
192                        let bodys = parsed
193                            .subparts
194                            .iter()
195                            .map(|s| {
196                                // XXX are we sure we only care about text
197                                // mime types? There others?
198                                if s.ctype.mimetype.starts_with("text") {
199                                    Ok(Some(s.get_body()?))
200                                } else {
201                                    Ok(None)
202                                }
203                            })
204                            .collect::<Result<Vec<Option<String>>>>()?;
205                        let bodys = bodys.iter().filter_map(|f| f.clone());
206                        is_match = sub_match(res, bodys) && is_match;
207                    }
208                }
209                if part.starts_with('@') {
210                    continue;
211                }
212
213                match msg.header(part) {
214                    Ok(None) => {
215                        is_match = false;
216                    }
217                    Ok(Some(p)) => {
218                        for re in res {
219                            is_match = re.is_match(&p) && is_match;
220                            if !is_match {
221                                break;
222                            }
223                        }
224                    }
225                    Err(e) => return Err(NotmuchError(e)),
226                }
227            }
228            if is_match {
229                return Ok(true);
230            }
231        }
232        Ok(false)
233    }
234}