1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
/*!
This crate provides both a library as well as a standalone binary that can be
used as an "[initial tagging]" system for the [notmuch] email system. As a
standalone tool it integrates with the notmuch hooks and configuration files,
while the library may be integrated into a bigger e-mail client that makes use
of [notmuch-rs].

# What?

notcoal reads JSON files with [regex] patterns, checks an incoming message's
respective header for a match. If an appropriate match is found, it is then able
to add or remove tags, run an arbitrary binary for further processing, or delete
the notmuch database entry and the corresponding file.

Rules can be combined with AND as well as OR.

# Example: a filter in a JSON file

```json,ignore
[{
    "name": "money",
    "desc": "Money stuff",
    "rules": [
        {"from": "@(real\\.bank|gig-economy\\.career)",
         "subject": ["report", "month" ]},
        {"from": "no-reply@trusted\\.bank",
         "subject": "statement"}
    ],
    "op": {
        "add": "€£$",
        "rm": ["inbox", "unread"],
        "run": ["any-binary-in-our-path-or-absolute-path", "--argument"]
    }
}]
```

The rules in this filter definition are equivalent to:

```text,ignore
( from: ("@real.bank" OR "@gig-economy.career") AND
  subject: ("report" AND "month") )
OR
( from: "no-reply@trusted.bank" AND
  subject: "statement" )
```

If if this filter is applied the operations will

* add the tag `€£$`
* remove the tags `inbox` and `unread`
* run the equivalent of
  `/bin/sh -c 'any-binary-in-our-path-or-absolute-path --argument'`
  with 3 additional environment variables:

```sh,ignore
NOTCOAL_FILTER_NAME=money
NOTCOAL_FILE_NAME=/path/to/maildir/new/filename
NOTCOAL_MSG_ID=e81cadebe7dab1cc6fac7e6a41@some-isp
```

# What notcoal can match

Arbitrary headers! Matching `from` and `subject` are in no way a special case
since all headers are treated equal (and case-insensitive). The mere existence
of a header may be occasionally enough for classification, and while the
[`Value`] enum also has a boolean field, it can not be used in rules.

In addition to arbitrary headers, notcoal also supports "special field checks":

* `@tags`: tags that have already been set by an filter that matched earlier
* `@path`: the file system path of the message being processed
* `@attachment`: any attachment file names
* `@body`: the message body. The first (usually plain text) body part only.
* `@attachment-body`: any attachments contents as long as the MIME type starts
  with `text`
* `@thread-tags`: match on any tag in the thread that we belong to (e.g.
  *mute*).<br>
  **Please note, this applies to the *entire* thread**, not only to the local
  branch.

[regex]: https://docs.rs/regex/
[notmuch]: https://notmuchmail.org/
[initial tagging]: https://notmuchmail.org/initial_tagging/
[notmuch-rs]: https://github.com/vhdirk/notmuch-rs/
[`Value`]: enum.Value.html
*/

use serde::{Deserialize, Serialize};

use std::fs::File;
use std::io::Read;
use std::path::Path;

use notmuch::Database;

pub mod error;
use crate::error::Error::*;
use crate::error::Result;
mod filter;
pub use crate::filter::*;
mod operations;
pub use crate::operations::*;

/// Possible values for operations and rules
///
/// To make the JSON files more legible in case they are hand-crafted, provide
/// different options for the same fields.
#[derive(Debug, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(untagged)]
pub enum Value {
    Single(String),
    Multiple(Vec<String>),
    Bool(bool),
}

/// Determines behaviour for filter execution
pub struct FilterOptions {
    /// To leave "query tag" in place instead of removing it once all filters ran
    pub leave_tag: bool,
    /// Force maildir flag syncing
    pub sync_tags: bool,
}

/// Very basic sanitisation for our (user supplied) query
fn validate_query_tag(tag: &str) -> Result<String> {
    if tag.is_empty() {
        let e = "Tag to query can't be empty".to_string();
        return Err(UnsupportedQuery(e));
    };
    if tag.contains(' ') || tag.contains('"') || tag.contains('\'') {
        let e = "Query tags can't contain whitespace or quotes".to_string();
        Err(UnsupportedQuery(e))
    } else {
        Ok(format!("tag:{}", tag))
    }
}

/// Apply all supplied filters to the corresponding matching messages
///
/// Either fails or returns how many filters were applied
pub fn filter(
    db: &Database,
    query_tag: &str,
    options: &FilterOptions,
    filters: &[Filter],
) -> Result<usize> {
    let query = validate_query_tag(query_tag)?;
    let q = db.create_query(&query)?;
    let mut matches = 0;
    for msg in q.search_messages()? {
        let mut exists = true;
        for filter in filters {
            let (applied, deleted) = filter.apply_if_match(&msg, db)?;
            if applied {
                matches += 1;
            }
            if deleted {
                exists = !deleted;
                break;
            }
        }
        if exists {
            if !options.leave_tag {
                msg.remove_tag(query_tag)?;
            }
            if options.sync_tags {
                msg.tags_to_maildir_flags()?;
            }
        }
    }
    Ok(matches)
}

/// Returns how many matches there are as well as what Message-IDs have been
/// matched by which filters, without running any of the operations
pub fn filter_dry(
    db: &Database,
    query_tag: &str,
    filters: &[Filter],
) -> Result<(usize, Vec<String>)> {
    let query = validate_query_tag(query_tag)?;
    let q = db.create_query(&query)?;
    let mut matches = 0;
    let mut mtchinf = Vec::<String>::new();
    for msg in q.search_messages()? {
        let mut msg_matches = 0;
        match filters
            .iter()
            .map(|f| {
                let is_match = f.is_match(&msg, db)?;
                if is_match {
                    msg_matches += 1;
                    mtchinf.push(format!("{}: {}", msg.id(), f.name()));
                }
                Ok(())
            })
            .collect::<Result<Vec<()>>>()
        {
            Ok(_) => matches += msg_matches,
            Err(e) => return Err(e),
        };
    }
    Ok((matches, mtchinf))
}

/// Deserialize filters from bytes
pub fn filters_from(buf: &[u8]) -> Result<Vec<Filter>> {
    serde_json::from_slice::<Vec<Filter>>(buf)?
        .into_iter()
        .map(|f| f.compile())
        .collect()
}

/// Deserialize a filters from file
pub fn filters_from_file<P>(filename: &P) -> Result<Vec<Filter>>
where
    P: AsRef<Path>,
{
    let mut buf = Vec::new();
    let mut file = File::open(filename)?;
    file.read_to_end(&mut buf)?;
    filters_from(&buf)
}