Skip to main content

mxr_search/
query_builder.rs

1use crate::ast::*;
2use crate::schema::MxrSchema;
3use chrono::{Datelike, Local, NaiveDate};
4use std::ops::Bound;
5use tantivy::query::{AllQuery, BooleanQuery, BoostQuery, Occur, PhraseQuery, Query, RangeQuery, TermQuery};
6use tantivy::schema::{Field, IndexRecordOption};
7use tantivy::Term;
8
9pub struct QueryBuilder {
10    subject: Field,
11    from_name: Field,
12    from_email: Field,
13    to_email: Field,
14    cc_email: Field,
15    bcc_email: Field,
16    snippet: Field,
17    body_text: Field,
18    attachment_filenames: Field,
19    labels: Field,
20    is_read: Field,
21    is_starred: Field,
22    is_draft: Field,
23    is_sent: Field,
24    is_trash: Field,
25    is_spam: Field,
26    is_answered: Field,
27    has_attachments: Field,
28}
29
30impl QueryBuilder {
31    pub fn new(schema: &MxrSchema) -> Self {
32        Self {
33            subject: schema.subject,
34            from_name: schema.from_name,
35            from_email: schema.from_email,
36            to_email: schema.to_email,
37            cc_email: schema.cc_email,
38            bcc_email: schema.bcc_email,
39            snippet: schema.snippet,
40            body_text: schema.body_text,
41            attachment_filenames: schema.attachment_filenames,
42            labels: schema.labels,
43            is_read: schema.is_read,
44            is_starred: schema.is_starred,
45            is_draft: schema.is_draft,
46            is_sent: schema.is_sent,
47            is_trash: schema.is_trash,
48            is_spam: schema.is_spam,
49            is_answered: schema.is_answered,
50            has_attachments: schema.has_attachments,
51        }
52    }
53
54    pub fn build(&self, node: &QueryNode) -> Box<dyn Query> {
55        match node {
56            QueryNode::Text(text) => self.build_text_query(text),
57            QueryNode::Phrase(phrase) => self.build_phrase_query(phrase),
58            QueryNode::Field { field, value } => self.build_field_query(field, value),
59            QueryNode::Filter(filter) => self.build_filter_query(filter),
60            QueryNode::Label(label) => self.build_label_query(label),
61            QueryNode::DateRange { bound, date } => self.build_date_query(bound, date),
62            QueryNode::Size { op, bytes } => self.build_size_query(op, *bytes),
63            QueryNode::And(left, right) => {
64                let left_q = self.build(left);
65                let right_q = self.build(right);
66                Box::new(BooleanQuery::new(vec![
67                    (Occur::Must, left_q),
68                    (Occur::Must, right_q),
69                ]))
70            }
71            QueryNode::Or(left, right) => {
72                let left_q = self.build(left);
73                let right_q = self.build(right);
74                Box::new(BooleanQuery::new(vec![
75                    (Occur::Should, left_q),
76                    (Occur::Should, right_q),
77                ]))
78            }
79            QueryNode::Not(inner) => {
80                let inner_q = self.build(inner);
81                Box::new(BooleanQuery::new(vec![
82                    (Occur::MustNot, inner_q),
83                    // BooleanQuery with only MustNot needs an all-docs clause
84                    (Occur::Should, Box::new(AllQuery)),
85                ]))
86            }
87        }
88    }
89
90    fn build_text_query(&self, text: &str) -> Box<dyn Query> {
91        let fields_boosts: Vec<(Field, f32)> = vec![
92            (self.subject, 3.0),
93            (self.from_name, 2.0),
94            (self.from_email, 2.0),
95            (self.snippet, 1.0),
96            (self.body_text, 0.5),
97        ];
98
99        let text_lower = text.to_lowercase();
100        let subqueries: Vec<(Occur, Box<dyn Query>)> = fields_boosts
101            .into_iter()
102            .map(|(field, boost)| {
103                let term = Term::from_field_text(field, &text_lower);
104                let tq = TermQuery::new(term, IndexRecordOption::WithFreqs);
105                let boosted: Box<dyn Query> = Box::new(BoostQuery::new(Box::new(tq), boost));
106                (Occur::Should, boosted)
107            })
108            .collect();
109
110        Box::new(BooleanQuery::new(subqueries))
111    }
112
113    fn build_phrase_query(&self, phrase: &str) -> Box<dyn Query> {
114        let terms: Vec<Term> = phrase
115            .split_whitespace()
116            .map(|w| Term::from_field_text(self.subject, &w.to_lowercase()))
117            .collect();
118
119        if terms.len() == 1 {
120            let tq = TermQuery::new(
121                terms.into_iter().next().unwrap(),
122                IndexRecordOption::WithFreqs,
123            );
124            return Box::new(BoostQuery::new(Box::new(tq), 3.0));
125        }
126
127        let phrase_q = PhraseQuery::new(terms);
128        Box::new(BoostQuery::new(Box::new(phrase_q), 3.0))
129    }
130
131    fn build_field_query(&self, field: &QueryField, value: &str) -> Box<dyn Query> {
132        let tantivy_field = match field {
133            QueryField::From => self.from_email,
134            QueryField::To => self.to_email,
135            QueryField::Cc => self.cc_email,
136            QueryField::Bcc => self.bcc_email,
137            QueryField::Subject => self.subject,
138            QueryField::Body => self.body_text,
139            QueryField::Filename => self.attachment_filenames,
140        };
141
142        match field {
143            QueryField::Subject | QueryField::Body | QueryField::Filename => {
144                self.build_text_field_query(tantivy_field, value)
145            }
146            QueryField::From | QueryField::To | QueryField::Cc | QueryField::Bcc => {
147                let term = Term::from_field_text(tantivy_field, value);
148                Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs))
149            }
150        }
151    }
152
153    fn build_filter_query(&self, filter: &FilterKind) -> Box<dyn Query> {
154        match filter {
155            FilterKind::Read => {
156                let term = Term::from_field_bool(self.is_read, true);
157                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
158            }
159            FilterKind::Unread => {
160                let term = Term::from_field_bool(self.is_read, false);
161                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
162            }
163            FilterKind::Starred => {
164                let term = Term::from_field_bool(self.is_starred, true);
165                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
166            }
167            FilterKind::Draft => {
168                let term = Term::from_field_bool(self.is_draft, true);
169                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
170            }
171            FilterKind::Sent => {
172                let term = Term::from_field_bool(self.is_sent, true);
173                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
174            }
175            FilterKind::Trash => {
176                let term = Term::from_field_bool(self.is_trash, true);
177                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
178            }
179            FilterKind::Spam => {
180                let term = Term::from_field_bool(self.is_spam, true);
181                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
182            }
183            FilterKind::Answered => {
184                let term = Term::from_field_bool(self.is_answered, true);
185                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
186            }
187            FilterKind::Inbox => self.build_label_query("INBOX"),
188            FilterKind::Archived => Box::new(BooleanQuery::new(vec![
189                (Occur::Should, self.build_label_query("ARCHIVE")),
190                (Occur::Should, Box::new(BooleanQuery::new(vec![
191                    (Occur::MustNot, self.build_label_query("INBOX")),
192                    (Occur::MustNot, self.build_filter_query(&FilterKind::Sent)),
193                    (Occur::MustNot, self.build_filter_query(&FilterKind::Draft)),
194                    (Occur::MustNot, self.build_filter_query(&FilterKind::Trash)),
195                    (Occur::MustNot, self.build_filter_query(&FilterKind::Spam)),
196                    (Occur::Should, Box::new(AllQuery)),
197                ]))),
198            ])),
199            FilterKind::HasAttachment => {
200                let term = Term::from_field_bool(self.has_attachments, true);
201                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
202            }
203        }
204    }
205
206    fn build_label_query(&self, label: &str) -> Box<dyn Query> {
207        let term = Term::from_field_text(self.labels, &label.to_lowercase());
208        Box::new(TermQuery::new(term, IndexRecordOption::Basic))
209    }
210
211    fn build_date_query(&self, bound: &DateBound, date_val: &DateValue) -> Box<dyn Query> {
212        let resolved = resolve_date(date_val);
213        let field_name = "date".to_string();
214        let start = self.date_to_tantivy(resolved);
215
216        match bound {
217            DateBound::After => Box::new(RangeQuery::new_date_bounds(
218                field_name,
219                Bound::Included(start),
220                Bound::Unbounded,
221            )),
222            DateBound::Before => Box::new(RangeQuery::new_date_bounds(
223                field_name,
224                Bound::Unbounded,
225                Bound::Excluded(start),
226            )),
227            DateBound::Exact => {
228                let end_date = resolved.succ_opt().unwrap_or(resolved);
229                let end = self.date_to_tantivy(end_date);
230                Box::new(RangeQuery::new_date_bounds(
231                    field_name,
232                    Bound::Included(start),
233                    Bound::Excluded(end),
234                ))
235            }
236        }
237    }
238
239    fn build_size_query(&self, op: &SizeOp, bytes: u64) -> Box<dyn Query> {
240        let field_name = "size_bytes".to_string();
241        match op {
242            SizeOp::LessThan => Box::new(RangeQuery::new_u64_bounds(
243                field_name,
244                Bound::Unbounded,
245                Bound::Excluded(bytes),
246            )),
247            SizeOp::LessThanOrEqual => Box::new(RangeQuery::new_u64_bounds(
248                field_name,
249                Bound::Unbounded,
250                Bound::Included(bytes),
251            )),
252            SizeOp::Equal => Box::new(RangeQuery::new_u64_bounds(
253                field_name,
254                Bound::Included(bytes),
255                Bound::Included(bytes),
256            )),
257            SizeOp::GreaterThan => Box::new(RangeQuery::new_u64_bounds(
258                field_name,
259                Bound::Excluded(bytes),
260                Bound::Unbounded,
261            )),
262            SizeOp::GreaterThanOrEqual => Box::new(RangeQuery::new_u64_bounds(
263                field_name,
264                Bound::Included(bytes),
265                Bound::Unbounded,
266            )),
267        }
268    }
269
270    fn build_text_field_query(&self, field: Field, value: &str) -> Box<dyn Query> {
271        let terms: Vec<Term> = tokenize_text_value(value)
272            .into_iter()
273            .map(|word| Term::from_field_text(field, &word))
274            .collect();
275
276        if terms.len() <= 1 {
277            let term = terms
278                .into_iter()
279                .next()
280                .unwrap_or_else(|| Term::from_field_text(field, &value.to_lowercase()));
281            return Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs));
282        }
283
284        Box::new(PhraseQuery::new(terms))
285    }
286
287    fn date_to_tantivy(&self, date: NaiveDate) -> tantivy::DateTime {
288        let dt = date.and_hms_opt(0, 0, 0).unwrap();
289        let ts = dt.and_utc().timestamp();
290        tantivy::DateTime::from_timestamp_secs(ts)
291    }
292}
293
294fn resolve_date(date_val: &DateValue) -> NaiveDate {
295    let today = Local::now().date_naive();
296    match date_val {
297        DateValue::Specific(d) => *d,
298        DateValue::Today => today,
299        DateValue::Yesterday => today.pred_opt().unwrap_or(today),
300        DateValue::ThisWeek => {
301            let weekday = today.weekday().num_days_from_monday();
302            today - chrono::Duration::days(weekday as i64)
303        }
304        DateValue::ThisMonth => {
305            NaiveDate::from_ymd_opt(today.year(), today.month(), 1).unwrap_or(today)
306        }
307    }
308}
309
310fn tokenize_text_value(value: &str) -> Vec<String> {
311    value
312        .split(|ch: char| !ch.is_alphanumeric())
313        .filter(|part| !part.is_empty())
314        .map(|part| part.to_lowercase())
315        .collect()
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321    use crate::index::SearchIndex;
322    use crate::parser::parse_query;
323    use mxr_core::id::*;
324    use mxr_core::types::*;
325
326    fn make_test_envelope(
327        subject: &str,
328        from_email: &str,
329        from_name: &str,
330        flags: MessageFlags,
331        has_attachments: bool,
332    ) -> Envelope {
333        Envelope {
334            id: MessageId::new(),
335            account_id: AccountId::new(),
336            provider_id: format!("fake-{}", subject.len()),
337            thread_id: ThreadId::new(),
338            message_id_header: None,
339            in_reply_to: None,
340            references: vec![],
341            from: Address {
342                name: Some(from_name.to_string()),
343                email: from_email.to_string(),
344            },
345            to: vec![Address {
346                name: None,
347                email: "recipient@example.com".to_string(),
348            }],
349            cc: vec![],
350            bcc: vec![],
351            subject: subject.to_string(),
352            date: chrono::Utc::now(),
353            flags,
354            snippet: format!("Snippet for {}", subject),
355            has_attachments,
356            size_bytes: 1000,
357            unsubscribe: UnsubscribeMethod::None,
358            label_provider_ids: vec![],
359        }
360    }
361
362    fn build_test_index() -> (SearchIndex, Vec<Envelope>) {
363        let mut idx = SearchIndex::in_memory().unwrap();
364        let envelopes = vec![
365            make_test_envelope(
366                "Deployment plan for v2",
367                "alice@example.com",
368                "Alice",
369                MessageFlags::empty(), // unread
370                false,
371            ),
372            make_test_envelope(
373                "Invoice #2847",
374                "bob@example.com",
375                "Bob",
376                MessageFlags::READ | MessageFlags::STARRED,
377                true,
378            ),
379            make_test_envelope(
380                "Team standup notes",
381                "carol@example.com",
382                "Carol",
383                MessageFlags::READ,
384                false,
385            ),
386        ];
387        for env in &envelopes {
388            idx.index_envelope(env).unwrap();
389        }
390        idx.commit().unwrap();
391        (idx, envelopes)
392    }
393
394    #[test]
395    fn build_text_query_with_boosts() {
396        let (idx, envelopes) = build_test_index();
397        let schema = MxrSchema::build();
398        let qb = QueryBuilder::new(&schema);
399
400        let node = QueryNode::Text("deployment".to_string());
401        let query = qb.build(&node);
402        let results = idx.search_ast(query, 10).unwrap();
403        assert_eq!(results.len(), 1);
404        assert_eq!(results[0].message_id, envelopes[0].id.as_str());
405    }
406
407    #[test]
408    fn build_field_query() {
409        let (idx, envelopes) = build_test_index();
410        let schema = MxrSchema::build();
411        let qb = QueryBuilder::new(&schema);
412
413        let node = QueryNode::Field {
414            field: QueryField::From,
415            value: "alice@example.com".to_string(),
416        };
417        let query = qb.build(&node);
418        let results = idx.search_ast(query, 10).unwrap();
419        assert_eq!(results.len(), 1);
420        assert_eq!(results[0].message_id, envelopes[0].id.as_str());
421    }
422
423    #[test]
424    fn build_filter_query() {
425        let (idx, _envelopes) = build_test_index();
426        let schema = MxrSchema::build();
427        let qb = QueryBuilder::new(&schema);
428
429        // Search for unread messages
430        let node = QueryNode::Filter(FilterKind::Unread);
431        let query = qb.build(&node);
432        let results = idx.search_ast(query, 10).unwrap();
433        // Only the first envelope is unread (flags empty)
434        assert_eq!(results.len(), 1);
435    }
436
437    #[test]
438    fn build_date_range_query() {
439        let (idx, _envelopes) = build_test_index();
440        let schema = MxrSchema::build();
441        let qb = QueryBuilder::new(&schema);
442
443        // All test envelopes are dated today, so after yesterday should return all
444        let yesterday = Local::now().date_naive().pred_opt().unwrap();
445        let node = QueryNode::DateRange {
446            bound: DateBound::After,
447            date: DateValue::Specific(yesterday),
448        };
449        let query = qb.build(&node);
450        let results = idx.search_ast(query, 10).unwrap();
451        assert_eq!(results.len(), 3);
452    }
453
454    #[test]
455    fn build_compound_query() {
456        let (idx, envelopes) = build_test_index();
457        let schema = MxrSchema::build();
458        let qb = QueryBuilder::new(&schema);
459
460        // from:bob AND is:starred
461        let node = QueryNode::And(
462            Box::new(QueryNode::Field {
463                field: QueryField::From,
464                value: "bob@example.com".to_string(),
465            }),
466            Box::new(QueryNode::Filter(FilterKind::Starred)),
467        );
468        let query = qb.build(&node);
469        let results = idx.search_ast(query, 10).unwrap();
470        assert_eq!(results.len(), 1);
471        assert_eq!(results[0].message_id, envelopes[1].id.as_str());
472    }
473
474    #[test]
475    fn search_with_parsed_query() {
476        let (idx, envelopes) = build_test_index();
477        let schema = MxrSchema::build();
478        let qb = QueryBuilder::new(&schema);
479
480        let ast = parse_query("from:alice@example.com").unwrap();
481        let query = qb.build(&ast);
482        let results = idx.search_ast(query, 10).unwrap();
483        assert_eq!(results.len(), 1);
484        assert_eq!(results[0].message_id, envelopes[0].id.as_str());
485    }
486}