1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
use std::collections::hash_map::DefaultHasher;
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::convert::AsRef;
use std::fs::File;
use std::hash::Hasher;
use std::io::Read;
use std::iter::Iterator;
use mailparse::*;
use regex::Regex;
use serde::{Deserialize, Serialize};
use crate::error::Error::*;
use crate::error::*;
use crate::Operations;
use crate::Value;
use crate::Value::*;
use notmuch::{Database, Message, Query, Threads};
#[derive(Debug, Serialize, Deserialize, Default)]
#[serde(deny_unknown_fields)]
pub struct Filter {
name: Option<String>,
/// Description
///
/// Not really used for anything at this point in time, but may be good for
/// users to remember what this specific filter is doing
pub desc: Option<String>,
/// List of rules
///
/// This list is an OR list, meaning the filter will match if any rule
/// matches. However, AND combinations may happen within a rule
// at the moment, since we are generating a hash in the name function this
// field needs to be consistent in the order it prints its key/value pairs
pub rules: Vec<BTreeMap<String, Value>>,
/// Operations that will be applied if this any rule matches
pub op: Operations,
#[serde(skip)]
re: Vec<HashMap<String, Vec<Regex>>>,
}
impl Filter {
pub fn new() -> Self {
Default::default()
}
/// Returns either the set name, or a hash of [`Filter::rules`]. Please
/// note: hashed names are not used for serialization.
///
/// [`Filter::rules`]: struct.Filter.html#structfield.rules
pub fn name(&self) -> String {
match &self.name {
Some(name) => name.clone(),
None => {
// XXX This seems dumb, there has to be a better way
let mut h = DefaultHasher::new();
let buf = format!("{:?}", self.rules);
for byte in buf.as_bytes() {
h.write_u8(*byte);
}
format!("{:x}", h.finish())
}
}
}
pub fn set_name(&mut self, name: &str) {
self.name = Some(name.to_string());
}
/// When filters are deserialized from json or have been assembled via code,
/// the regular expressions contained in [`Filter::rules`] need to be
/// compiled before any matches are to be made.
///
/// [`Filter::rules`]: struct.Filter.html#structfield.rules
pub fn compile(mut self) -> Result<Self> {
for rule in &self.rules {
let mut compiled = HashMap::new();
for (key, value) in rule.iter() {
let mut res = Vec::new();
match value {
Single(re) => res.push(Regex::new(re)?),
Multiple(mre) => {
for re in mre {
res.push(Regex::new(re)?);
}
}
_ => {
let e = "Not a regular expression".to_string();
return Err(UnsupportedValue(e));
}
}
compiled.insert(key.to_string(), res);
}
self.re.push(compiled);
}
Ok(self)
}
/// Combines [`Filter::is_match`] and [`Operations::apply`]
///
/// Returns a tuple of two bools, the first representing if the filter has
/// been applied, the second if the operation deleted the message that was
/// supplied
///
/// [`Filter::is_match`]: struct.Filter.html#method.is_match
/// [`Operations::apply`]: struct.Operations.html#method.apply
pub fn apply_if_match(&self, msg: &Message, db: &Database) -> Result<(bool, bool)> {
if self.is_match(msg, db)? {
Ok((true, self.op.apply(msg, db, &self.name())?))
} else {
Ok((false, false))
}
}
/// Checks if the supplied message matches any of the combinations described
/// in [`Filter::rules`]
///
/// [`Filter::rules`]: struct.Filter.html#structfield.rules
pub fn is_match(&self, msg: &Message, db: &Database) -> Result<bool> {
/// Test if any of the supplied values match any of our supplied regular
/// expressions.
fn sub_match<I, S>(res: &[Regex], values: I) -> bool
where
S: AsRef<str>,
I: Iterator<Item = S>,
{
for value in values {
for re in res {
if re.is_match(value.as_ref()) {
return true;
}
}
}
false
}
// self.re will only be populated after self.compile()
if self.re.len() != self.rules.len() {
let e = "Filters need to be compiled before tested".to_string();
return Err(RegexUncompiled(e));
}
for rule in &self.re {
let mut is_match = true;
for (part, res) in rule {
let q: Query;
let mut r: Threads;
if part == "@path" {
// XXX we might want to return an error here if we can't
// make the path to a valid utf-8 str? Or maybe go for
// to_str_lossy?
let vs = msg
.filenames()
.filter_map(|f| f.to_str().map(|n| n.to_string()));
is_match = sub_match(res, vs) && is_match;
} else if part == "@tags" {
is_match = sub_match(res, msg.tags()) && is_match;
} else if part == "@thread-tags" {
// creating a new query as we don't have information about
// our own thread yet
q = db.create_query(&format!("thread:{}", msg.thread_id()))?;
r = q.search_threads()?;
if let Some(thread) = r.next() {
is_match = sub_match(res, thread.tags()) && is_match;
}
} else if part == "@attachment" || part == "@attachment-body" || part == "@body" {
// since we might combine these we try avoid parsing the
// same file over and over again.
let mut buf = Vec::new();
// XXX-file notmuch says it returns a random filename if
// multiple are present. Question is if the new tag is even
// applied to messages we've already seen, do we ever run
// into that being a problem at all?
let mut file = File::open(msg.filename())?;
file.read_to_end(&mut buf)?;
let parsed = parse_mail(&buf)?;
if part == "@attachment" {
// XXX Check if this can be refactored with less cloning
let fns = parsed
.subparts
.iter()
.map(|s| s.get_content_disposition().params.get("filename").cloned())
.collect::<Vec<Option<String>>>();
let fns = fns.iter().filter_map(|f| f.clone());
is_match = sub_match(res, fns) && is_match;
} else if part == "@body" {
is_match = sub_match(res, [parsed.get_body()?].iter()) && is_match;
} else if part == "@attachment-body" {
let bodys = parsed
.subparts
.iter()
.map(|s| {
// XXX are we sure we only care about text
// mime types? There others?
if s.ctype.mimetype.starts_with("text") {
Ok(Some(s.get_body()?))
} else {
Ok(None)
}
})
.collect::<Result<Vec<Option<String>>>>()?;
let bodys = bodys.iter().filter_map(|f| f.clone());
is_match = sub_match(res, bodys) && is_match;
}
}
if part.starts_with('@') {
continue;
}
match msg.header(part) {
Ok(None) => {
is_match = false;
}
Ok(Some(p)) => {
for re in res {
is_match = re.is_match(&p) && is_match;
if !is_match {
break;
}
}
}
Err(e) => return Err(NotmuchError(e)),
}
}
if is_match {
return Ok(true);
}
}
Ok(false)
}
}