1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
use std::collections::hash_map::DefaultHasher;
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::convert::AsRef;
use std::fs::File;
use std::hash::Hasher;
use std::io::Read;
use std::iter::Iterator;

use mailparse::*;
use regex::Regex;
use serde::{Deserialize, Serialize};

use crate::error::Error::*;
use crate::error::*;

use crate::Operations;
use crate::Value;
use crate::Value::*;

use notmuch::{Database, Message, Query, Threads};

#[derive(Debug, Serialize, Deserialize, Default)]
#[serde(deny_unknown_fields)]
pub struct Filter {
    name: Option<String>,
    /// Description
    ///
    /// Not really used for anything at this point in time, but may be good for
    /// users to remember what this specific filter is doing
    pub desc: Option<String>,
    /// List of rules
    ///
    /// This list is an OR list, meaning the filter will match if any rule
    /// matches. However, AND combinations may happen within a rule
    // at the moment, since we are generating a hash in the name function this
    // field needs to be consistent in the order it prints its key/value pairs
    pub rules: Vec<BTreeMap<String, Value>>,
    /// Operations that will be applied if this any rule matches
    pub op: Operations,
    #[serde(skip)]
    re: Vec<HashMap<String, Vec<Regex>>>,
}

impl Filter {
    pub fn new() -> Self {
        Default::default()
    }

    /// Returns either the set name, or a hash of [`Filter::rules`]. Please
    /// note: hashed names are not used for serialization.
    ///
    /// [`Filter::rules`]: struct.Filter.html#structfield.rules
    pub fn name(&self) -> String {
        match &self.name {
            Some(name) => name.clone(),
            None => {
                // XXX This seems dumb, there has to be a better way
                let mut h = DefaultHasher::new();
                let buf = format!("{:?}", self.rules);
                for byte in buf.as_bytes() {
                    h.write_u8(*byte);
                }
                format!("{:x}", h.finish())
            }
        }
    }

    pub fn set_name(&mut self, name: &str) {
        self.name = Some(name.to_string());
    }

    /// When filters are deserialized from json or have been assembled via code,
    /// the regular expressions contained in [`Filter::rules`] need to be
    /// compiled before any matches are to be made.
    ///
    /// [`Filter::rules`]: struct.Filter.html#structfield.rules
    pub fn compile(mut self) -> Result<Self> {
        for rule in &self.rules {
            let mut compiled = HashMap::new();
            for (key, value) in rule.iter() {
                let mut res = Vec::new();
                match value {
                    Single(re) => res.push(Regex::new(re)?),
                    Multiple(mre) => {
                        for re in mre {
                            res.push(Regex::new(re)?);
                        }
                    }
                    _ => {
                        let e = "Not a regular expression".to_string();
                        return Err(UnsupportedValue(e));
                    }
                }
                compiled.insert(key.to_string(), res);
            }
            self.re.push(compiled);
        }
        Ok(self)
    }

    /// Combines [`Filter::is_match`] and [`Operations::apply`]
    ///
    /// Returns a tuple of two bools, the first representing if the filter has
    /// been applied, the second if the operation deleted the message that was
    /// supplied
    ///
    /// [`Filter::is_match`]: struct.Filter.html#method.is_match
    /// [`Operations::apply`]: struct.Operations.html#method.apply
    pub fn apply_if_match(&self, msg: &Message, db: &Database) -> Result<(bool, bool)> {
        if self.is_match(msg, db)? {
            Ok((true, self.op.apply(msg, db, &self.name())?))
        } else {
            Ok((false, false))
        }
    }

    /// Checks if the supplied message matches any of the combinations described
    /// in [`Filter::rules`]
    ///
    /// [`Filter::rules`]: struct.Filter.html#structfield.rules
    pub fn is_match(&self, msg: &Message, db: &Database) -> Result<bool> {
        /// Test if any of the supplied values match any of our supplied regular
        /// expressions.
        fn sub_match<I, S>(res: &[Regex], values: I) -> bool
        where
            S: AsRef<str>,
            I: Iterator<Item = S>,
        {
            for value in values {
                for re in res {
                    if re.is_match(value.as_ref()) {
                        return true;
                    }
                }
            }
            false
        }

        // self.re will only be populated after self.compile()
        if self.re.len() != self.rules.len() {
            let e = "Filters need to be compiled before tested".to_string();
            return Err(RegexUncompiled(e));
        }

        for rule in &self.re {
            let mut is_match = true;
            for (part, res) in rule {
                let q: Query;
                let mut r: Threads;
                if part == "@path" {
                    // XXX we might want to return an error here if we can't
                    // make the path to a valid utf-8 str? Or maybe go for
                    // to_str_lossy?
                    let vs = msg
                        .filenames()
                        .filter_map(|f| f.to_str().map(|n| n.to_string()));
                    is_match = sub_match(res, vs) && is_match;
                } else if part == "@tags" {
                    is_match = sub_match(res, msg.tags()) && is_match;
                } else if part == "@thread-tags" {
                    // creating a new query as we don't have information about
                    // our own thread yet
                    q = db.create_query(&format!("thread:{}", msg.thread_id()))?;
                    r = q.search_threads()?;
                    if let Some(thread) = r.next() {
                        is_match = sub_match(res, thread.tags()) && is_match;
                    }
                } else if part == "@attachment" || part == "@attachment-body" || part == "@body" {
                    // since we might combine these we try avoid parsing the
                    // same file over and over again.
                    let mut buf = Vec::new();
                    // XXX-file notmuch says it returns a random filename if
                    // multiple are present. Question is if the new tag is even
                    // applied to messages we've already seen, do we ever run
                    // into that being a problem at all?
                    let mut file = File::open(msg.filename())?;
                    file.read_to_end(&mut buf)?;
                    let parsed = parse_mail(&buf)?;
                    if part == "@attachment" {
                        // XXX Check if this can be refactored with less cloning
                        let fns = parsed
                            .subparts
                            .iter()
                            .map(|s| s.get_content_disposition().params.get("filename").cloned())
                            .collect::<Vec<Option<String>>>();
                        let fns = fns.iter().filter_map(|f| f.clone());
                        is_match = sub_match(res, fns) && is_match;
                    } else if part == "@body" {
                        is_match = sub_match(res, [parsed.get_body()?].iter()) && is_match;
                    } else if part == "@attachment-body" {
                        let bodys = parsed
                            .subparts
                            .iter()
                            .map(|s| {
                                // XXX are we sure we only care about text
                                // mime types? There others?
                                if s.ctype.mimetype.starts_with("text") {
                                    Ok(Some(s.get_body()?))
                                } else {
                                    Ok(None)
                                }
                            })
                            .collect::<Result<Vec<Option<String>>>>()?;
                        let bodys = bodys.iter().filter_map(|f| f.clone());
                        is_match = sub_match(res, bodys) && is_match;
                    }
                }
                if part.starts_with('@') {
                    continue;
                }

                match msg.header(part) {
                    Ok(None) => {
                        is_match = false;
                    }
                    Ok(Some(p)) => {
                        for re in res {
                            is_match = re.is_match(&p) && is_match;
                            if !is_match {
                                break;
                            }
                        }
                    }
                    Err(e) => return Err(NotmuchError(e)),
                }
            }
            if is_match {
                return Ok(true);
            }
        }
        Ok(false)
    }
}