notcoal/filter.rs
1use std::collections::hash_map::DefaultHasher;
2use std::collections::BTreeMap;
3use std::collections::HashMap;
4use std::convert::AsRef;
5use std::fs::File;
6use std::hash::Hasher;
7use std::io::Read;
8use std::iter::Iterator;
9
10use mailparse::*;
11use regex::Regex;
12use serde::{Deserialize, Serialize};
13
14use crate::error::Error::*;
15use crate::error::*;
16
17use crate::Operations;
18use crate::Value;
19use crate::Value::*;
20
21use notmuch::{Database, Message, Query, Threads};
22
23#[derive(Debug, Serialize, Deserialize, Default)]
24#[serde(deny_unknown_fields)]
25pub struct Filter {
26 name: Option<String>,
27 /// Description
28 ///
29 /// Not really used for anything at this point in time, but may be good for
30 /// users to remember what this specific filter is doing
31 pub desc: Option<String>,
32 /// List of rules
33 ///
34 /// This list is an OR list, meaning the filter will match if any rule
35 /// matches. However, AND combinations may happen within a rule
36 // at the moment, since we are generating a hash in the name function this
37 // field needs to be consistent in the order it prints its key/value pairs
38 pub rules: Vec<BTreeMap<String, Value>>,
39 /// Operations that will be applied if this any rule matches
40 pub op: Operations,
41 #[serde(skip)]
42 re: Vec<HashMap<String, Vec<Regex>>>,
43}
44
45impl Filter {
46 pub fn new() -> Self {
47 Default::default()
48 }
49
50 /// Returns either the set name, or a hash of [`Filter::rules`]. Please
51 /// note: hashed names are not used for serialization.
52 ///
53 /// [`Filter::rules`]: struct.Filter.html#structfield.rules
54 pub fn name(&self) -> String {
55 match &self.name {
56 Some(name) => name.clone(),
57 None => {
58 // XXX This seems dumb, there has to be a better way
59 let mut h = DefaultHasher::new();
60 let buf = format!("{:?}", self.rules);
61 for byte in buf.as_bytes() {
62 h.write_u8(*byte);
63 }
64 format!("{:x}", h.finish())
65 }
66 }
67 }
68
69 pub fn set_name(&mut self, name: &str) {
70 self.name = Some(name.to_string());
71 }
72
73 /// When filters are deserialized from json or have been assembled via code,
74 /// the regular expressions contained in [`Filter::rules`] need to be
75 /// compiled before any matches are to be made.
76 ///
77 /// [`Filter::rules`]: struct.Filter.html#structfield.rules
78 pub fn compile(mut self) -> Result<Self> {
79 for rule in &self.rules {
80 let mut compiled = HashMap::new();
81 for (key, value) in rule.iter() {
82 let mut res = Vec::new();
83 match value {
84 Single(re) => res.push(Regex::new(re)?),
85 Multiple(mre) => {
86 for re in mre {
87 res.push(Regex::new(re)?);
88 }
89 }
90 _ => {
91 let e = "Not a regular expression".to_string();
92 return Err(UnsupportedValue(e));
93 }
94 }
95 compiled.insert(key.to_string(), res);
96 }
97 self.re.push(compiled);
98 }
99 Ok(self)
100 }
101
102 /// Combines [`Filter::is_match`] and [`Operations::apply`]
103 ///
104 /// Returns a tuple of two bools, the first representing if the filter has
105 /// been applied, the second if the operation deleted the message that was
106 /// supplied
107 ///
108 /// [`Filter::is_match`]: struct.Filter.html#method.is_match
109 /// [`Operations::apply`]: struct.Operations.html#method.apply
110 pub fn apply_if_match(&self, msg: &Message, db: &Database) -> Result<(bool, bool)> {
111 if self.is_match(msg, db)? {
112 Ok((true, self.op.apply(msg, db, &self.name())?))
113 } else {
114 Ok((false, false))
115 }
116 }
117
118 /// Checks if the supplied message matches any of the combinations described
119 /// in [`Filter::rules`]
120 ///
121 /// [`Filter::rules`]: struct.Filter.html#structfield.rules
122 pub fn is_match(&self, msg: &Message, db: &Database) -> Result<bool> {
123 /// Test if any of the supplied values match any of our supplied regular
124 /// expressions.
125 fn sub_match<I, S>(res: &[Regex], values: I) -> bool
126 where
127 S: AsRef<str>,
128 I: Iterator<Item = S>,
129 {
130 for value in values {
131 for re in res {
132 if re.is_match(value.as_ref()) {
133 return true;
134 }
135 }
136 }
137 false
138 }
139
140 // self.re will only be populated after self.compile()
141 if self.re.len() != self.rules.len() {
142 let e = "Filters need to be compiled before tested".to_string();
143 return Err(RegexUncompiled(e));
144 }
145
146 for rule in &self.re {
147 let mut is_match = true;
148 for (part, res) in rule {
149 let q: Query;
150 let mut r: Threads;
151 if part == "@path" {
152 // XXX we might want to return an error here if we can't
153 // make the path to a valid utf-8 str? Or maybe go for
154 // to_str_lossy?
155 let vs = msg
156 .filenames()
157 .filter_map(|f| f.to_str().map(|n| n.to_string()));
158 is_match = sub_match(res, vs) && is_match;
159 } else if part == "@tags" {
160 is_match = sub_match(res, msg.tags()) && is_match;
161 } else if part == "@thread-tags" {
162 // creating a new query as we don't have information about
163 // our own thread yet
164 q = db.create_query(&format!("thread:{}", msg.thread_id()))?;
165 r = q.search_threads()?;
166 if let Some(thread) = r.next() {
167 is_match = sub_match(res, thread.tags()) && is_match;
168 }
169 } else if part == "@attachment" || part == "@attachment-body" || part == "@body" {
170 // since we might combine these we try avoid parsing the
171 // same file over and over again.
172 let mut buf = Vec::new();
173 // XXX-file notmuch says it returns a random filename if
174 // multiple are present. Question is if the new tag is even
175 // applied to messages we've already seen, do we ever run
176 // into that being a problem at all?
177 let mut file = File::open(msg.filename())?;
178 file.read_to_end(&mut buf)?;
179 let parsed = parse_mail(&buf)?;
180 if part == "@attachment" {
181 // XXX Check if this can be refactored with less cloning
182 let fns = parsed
183 .subparts
184 .iter()
185 .map(|s| s.get_content_disposition().params.get("filename").cloned())
186 .collect::<Vec<Option<String>>>();
187 let fns = fns.iter().filter_map(|f| f.clone());
188 is_match = sub_match(res, fns) && is_match;
189 } else if part == "@body" {
190 is_match = sub_match(res, [parsed.get_body()?].iter()) && is_match;
191 } else if part == "@attachment-body" {
192 let bodys = parsed
193 .subparts
194 .iter()
195 .map(|s| {
196 // XXX are we sure we only care about text
197 // mime types? There others?
198 if s.ctype.mimetype.starts_with("text") {
199 Ok(Some(s.get_body()?))
200 } else {
201 Ok(None)
202 }
203 })
204 .collect::<Result<Vec<Option<String>>>>()?;
205 let bodys = bodys.iter().filter_map(|f| f.clone());
206 is_match = sub_match(res, bodys) && is_match;
207 }
208 }
209 if part.starts_with('@') {
210 continue;
211 }
212
213 match msg.header(part) {
214 Ok(None) => {
215 is_match = false;
216 }
217 Ok(Some(p)) => {
218 for re in res {
219 is_match = re.is_match(&p) && is_match;
220 if !is_match {
221 break;
222 }
223 }
224 }
225 Err(e) => return Err(NotmuchError(e)),
226 }
227 }
228 if is_match {
229 return Ok(true);
230 }
231 }
232 Ok(false)
233 }
234}