Skip to main content

mxr_search/
query_builder.rs

1use crate::ast::*;
2use crate::schema::MxrSchema;
3use chrono::{Datelike, Local, NaiveDate};
4use std::ops::Bound;
5use tantivy::query::{
6    AllQuery, BooleanQuery, BoostQuery, Occur, PhraseQuery, Query, RangeQuery, TermQuery,
7};
8use tantivy::schema::{Field, IndexRecordOption};
9use tantivy::Term;
10
11pub struct QueryBuilder {
12    subject: Field,
13    from_name: Field,
14    from_email: Field,
15    to_email: Field,
16    cc_email: Field,
17    bcc_email: Field,
18    snippet: Field,
19    body_text: Field,
20    attachment_filenames: Field,
21    labels: Field,
22    is_read: Field,
23    is_starred: Field,
24    is_draft: Field,
25    is_sent: Field,
26    is_trash: Field,
27    is_spam: Field,
28    is_answered: Field,
29    has_attachments: Field,
30}
31
32impl QueryBuilder {
33    pub fn new(schema: &MxrSchema) -> Self {
34        Self {
35            subject: schema.subject,
36            from_name: schema.from_name,
37            from_email: schema.from_email,
38            to_email: schema.to_email,
39            cc_email: schema.cc_email,
40            bcc_email: schema.bcc_email,
41            snippet: schema.snippet,
42            body_text: schema.body_text,
43            attachment_filenames: schema.attachment_filenames,
44            labels: schema.labels,
45            is_read: schema.is_read,
46            is_starred: schema.is_starred,
47            is_draft: schema.is_draft,
48            is_sent: schema.is_sent,
49            is_trash: schema.is_trash,
50            is_spam: schema.is_spam,
51            is_answered: schema.is_answered,
52            has_attachments: schema.has_attachments,
53        }
54    }
55
56    pub fn build(&self, node: &QueryNode) -> Box<dyn Query> {
57        match node {
58            QueryNode::Text(text) => self.build_text_query(text),
59            QueryNode::Phrase(phrase) => self.build_phrase_query(phrase),
60            QueryNode::Field { field, value } => self.build_field_query(field, value),
61            QueryNode::Filter(filter) => self.build_filter_query(filter),
62            QueryNode::Label(label) => self.build_label_query(label),
63            QueryNode::DateRange { bound, date } => self.build_date_query(bound, date),
64            QueryNode::Size { op, bytes } => self.build_size_query(op, *bytes),
65            QueryNode::And(left, right) => {
66                let left_q = self.build(left);
67                let right_q = self.build(right);
68                Box::new(BooleanQuery::new(vec![
69                    (Occur::Must, left_q),
70                    (Occur::Must, right_q),
71                ]))
72            }
73            QueryNode::Or(left, right) => {
74                let left_q = self.build(left);
75                let right_q = self.build(right);
76                Box::new(BooleanQuery::new(vec![
77                    (Occur::Should, left_q),
78                    (Occur::Should, right_q),
79                ]))
80            }
81            QueryNode::Not(inner) => {
82                let inner_q = self.build(inner);
83                Box::new(BooleanQuery::new(vec![
84                    (Occur::MustNot, inner_q),
85                    // BooleanQuery with only MustNot needs an all-docs clause
86                    (Occur::Should, Box::new(AllQuery)),
87                ]))
88            }
89        }
90    }
91
92    fn build_text_query(&self, text: &str) -> Box<dyn Query> {
93        let fields_boosts: Vec<(Field, f32)> = vec![
94            (self.subject, 3.0),
95            (self.from_name, 2.0),
96            (self.from_email, 2.0),
97            (self.snippet, 1.0),
98            (self.body_text, 0.5),
99            (self.attachment_filenames, 0.75),
100        ];
101
102        let tokens = tokenize_text_value(text);
103        if tokens.is_empty() {
104            return self.build_text_token_query(&fields_boosts, &text.to_lowercase());
105        }
106        if tokens.len() == 1 {
107            return self.build_text_token_query(&fields_boosts, &tokens[0]);
108        }
109
110        let token_groups = tokens
111            .into_iter()
112            .map(|token| {
113                (
114                    Occur::Must,
115                    self.build_text_token_query(&fields_boosts, &token),
116                )
117            })
118            .collect();
119        Box::new(BooleanQuery::new(token_groups))
120    }
121
122    fn build_phrase_query(&self, phrase: &str) -> Box<dyn Query> {
123        let terms: Vec<Term> = phrase
124            .split_whitespace()
125            .map(|w| Term::from_field_text(self.subject, &w.to_lowercase()))
126            .collect();
127
128        if terms.len() == 1 {
129            let tq = TermQuery::new(
130                terms.into_iter().next().unwrap(),
131                IndexRecordOption::WithFreqs,
132            );
133            return Box::new(BoostQuery::new(Box::new(tq), 3.0));
134        }
135
136        let phrase_q = PhraseQuery::new(terms);
137        Box::new(BoostQuery::new(Box::new(phrase_q), 3.0))
138    }
139
140    fn build_field_query(&self, field: &QueryField, value: &str) -> Box<dyn Query> {
141        let tantivy_field = match field {
142            QueryField::From => self.from_email,
143            QueryField::To => self.to_email,
144            QueryField::Cc => self.cc_email,
145            QueryField::Bcc => self.bcc_email,
146            QueryField::Subject => self.subject,
147            QueryField::Body => self.body_text,
148            QueryField::Filename => self.attachment_filenames,
149        };
150
151        match field {
152            QueryField::Subject | QueryField::Body | QueryField::Filename => {
153                self.build_text_field_query(tantivy_field, value)
154            }
155            QueryField::From | QueryField::To | QueryField::Cc | QueryField::Bcc => {
156                let term = Term::from_field_text(tantivy_field, value);
157                Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs))
158            }
159        }
160    }
161
162    fn build_filter_query(&self, filter: &FilterKind) -> Box<dyn Query> {
163        match filter {
164            FilterKind::Read => {
165                let term = Term::from_field_bool(self.is_read, true);
166                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
167            }
168            FilterKind::Unread => {
169                let term = Term::from_field_bool(self.is_read, false);
170                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
171            }
172            FilterKind::Starred => {
173                let term = Term::from_field_bool(self.is_starred, true);
174                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
175            }
176            FilterKind::Draft => {
177                let term = Term::from_field_bool(self.is_draft, true);
178                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
179            }
180            FilterKind::Sent => {
181                let term = Term::from_field_bool(self.is_sent, true);
182                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
183            }
184            FilterKind::Trash => {
185                let term = Term::from_field_bool(self.is_trash, true);
186                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
187            }
188            FilterKind::Spam => {
189                let term = Term::from_field_bool(self.is_spam, true);
190                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
191            }
192            FilterKind::Answered => {
193                let term = Term::from_field_bool(self.is_answered, true);
194                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
195            }
196            FilterKind::Inbox => self.build_label_query("INBOX"),
197            FilterKind::Archived => Box::new(BooleanQuery::new(vec![
198                (Occur::Should, self.build_label_query("ARCHIVE")),
199                (
200                    Occur::Should,
201                    Box::new(BooleanQuery::new(vec![
202                        (Occur::MustNot, self.build_label_query("INBOX")),
203                        (Occur::MustNot, self.build_filter_query(&FilterKind::Sent)),
204                        (Occur::MustNot, self.build_filter_query(&FilterKind::Draft)),
205                        (Occur::MustNot, self.build_filter_query(&FilterKind::Trash)),
206                        (Occur::MustNot, self.build_filter_query(&FilterKind::Spam)),
207                        (Occur::Should, Box::new(AllQuery)),
208                    ])),
209                ),
210            ])),
211            FilterKind::HasAttachment => {
212                let term = Term::from_field_bool(self.has_attachments, true);
213                Box::new(TermQuery::new(term, IndexRecordOption::Basic))
214            }
215        }
216    }
217
218    fn build_label_query(&self, label: &str) -> Box<dyn Query> {
219        let term = Term::from_field_text(self.labels, &label.to_lowercase());
220        Box::new(TermQuery::new(term, IndexRecordOption::Basic))
221    }
222
223    fn build_date_query(&self, bound: &DateBound, date_val: &DateValue) -> Box<dyn Query> {
224        let resolved = resolve_date(date_val);
225        let field_name = "date".to_string();
226        let start = self.date_to_tantivy(resolved);
227
228        match bound {
229            DateBound::After => Box::new(RangeQuery::new_date_bounds(
230                field_name,
231                Bound::Included(start),
232                Bound::Unbounded,
233            )),
234            DateBound::Before => Box::new(RangeQuery::new_date_bounds(
235                field_name,
236                Bound::Unbounded,
237                Bound::Excluded(start),
238            )),
239            DateBound::Exact => {
240                let end_date = resolved.succ_opt().unwrap_or(resolved);
241                let end = self.date_to_tantivy(end_date);
242                Box::new(RangeQuery::new_date_bounds(
243                    field_name,
244                    Bound::Included(start),
245                    Bound::Excluded(end),
246                ))
247            }
248        }
249    }
250
251    fn build_size_query(&self, op: &SizeOp, bytes: u64) -> Box<dyn Query> {
252        let field_name = "size_bytes".to_string();
253        match op {
254            SizeOp::LessThan => Box::new(RangeQuery::new_u64_bounds(
255                field_name,
256                Bound::Unbounded,
257                Bound::Excluded(bytes),
258            )),
259            SizeOp::LessThanOrEqual => Box::new(RangeQuery::new_u64_bounds(
260                field_name,
261                Bound::Unbounded,
262                Bound::Included(bytes),
263            )),
264            SizeOp::Equal => Box::new(RangeQuery::new_u64_bounds(
265                field_name,
266                Bound::Included(bytes),
267                Bound::Included(bytes),
268            )),
269            SizeOp::GreaterThan => Box::new(RangeQuery::new_u64_bounds(
270                field_name,
271                Bound::Excluded(bytes),
272                Bound::Unbounded,
273            )),
274            SizeOp::GreaterThanOrEqual => Box::new(RangeQuery::new_u64_bounds(
275                field_name,
276                Bound::Included(bytes),
277                Bound::Unbounded,
278            )),
279        }
280    }
281
282    fn build_text_field_query(&self, field: Field, value: &str) -> Box<dyn Query> {
283        let terms: Vec<Term> = tokenize_text_value(value)
284            .into_iter()
285            .map(|word| Term::from_field_text(field, &word))
286            .collect();
287
288        if terms.len() <= 1 {
289            let term = terms
290                .into_iter()
291                .next()
292                .unwrap_or_else(|| Term::from_field_text(field, &value.to_lowercase()));
293            return Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs));
294        }
295
296        Box::new(PhraseQuery::new(terms))
297    }
298
299    fn build_text_token_query(
300        &self,
301        fields_boosts: &[(Field, f32)],
302        token: &str,
303    ) -> Box<dyn Query> {
304        let subqueries = fields_boosts
305            .iter()
306            .map(|(field, boost)| {
307                let term = Term::from_field_text(*field, token);
308                let tq = TermQuery::new(term, IndexRecordOption::WithFreqs);
309                let boosted: Box<dyn Query> = Box::new(BoostQuery::new(Box::new(tq), *boost));
310                (Occur::Should, boosted)
311            })
312            .collect();
313        Box::new(BooleanQuery::new(subqueries))
314    }
315
316    fn date_to_tantivy(&self, date: NaiveDate) -> tantivy::DateTime {
317        let dt = date.and_hms_opt(0, 0, 0).unwrap();
318        let ts = dt.and_utc().timestamp();
319        tantivy::DateTime::from_timestamp_secs(ts)
320    }
321}
322
323fn resolve_date(date_val: &DateValue) -> NaiveDate {
324    let today = Local::now().date_naive();
325    match date_val {
326        DateValue::Specific(d) => *d,
327        DateValue::Today => today,
328        DateValue::Yesterday => today.pred_opt().unwrap_or(today),
329        DateValue::ThisWeek => {
330            let weekday = today.weekday().num_days_from_monday();
331            today - chrono::Duration::days(weekday as i64)
332        }
333        DateValue::ThisMonth => {
334            NaiveDate::from_ymd_opt(today.year(), today.month(), 1).unwrap_or(today)
335        }
336    }
337}
338
339fn tokenize_text_value(value: &str) -> Vec<String> {
340    value
341        .split(|ch: char| !ch.is_alphanumeric())
342        .filter(|part| !part.is_empty())
343        .map(|part| part.to_lowercase())
344        .collect()
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350    use crate::index::SearchIndex;
351    use crate::parser::parse_query;
352    use mxr_core::id::*;
353    use mxr_core::types::*;
354
355    fn make_test_envelope(
356        subject: &str,
357        from_email: &str,
358        from_name: &str,
359        flags: MessageFlags,
360        has_attachments: bool,
361    ) -> Envelope {
362        Envelope {
363            id: MessageId::new(),
364            account_id: AccountId::new(),
365            provider_id: format!("fake-{}", subject.len()),
366            thread_id: ThreadId::new(),
367            message_id_header: None,
368            in_reply_to: None,
369            references: vec![],
370            from: Address {
371                name: Some(from_name.to_string()),
372                email: from_email.to_string(),
373            },
374            to: vec![Address {
375                name: None,
376                email: "recipient@example.com".to_string(),
377            }],
378            cc: vec![],
379            bcc: vec![],
380            subject: subject.to_string(),
381            date: chrono::Utc::now(),
382            flags,
383            snippet: format!("Snippet for {}", subject),
384            has_attachments,
385            size_bytes: 1000,
386            unsubscribe: UnsubscribeMethod::None,
387            label_provider_ids: vec![],
388        }
389    }
390
391    fn build_test_index() -> (SearchIndex, Vec<Envelope>) {
392        let mut idx = SearchIndex::in_memory().unwrap();
393        let envelopes = vec![
394            make_test_envelope(
395                "Deployment plan for v2",
396                "alice@example.com",
397                "Alice",
398                MessageFlags::empty(), // unread
399                false,
400            ),
401            make_test_envelope(
402                "Invoice #2847",
403                "bob@example.com",
404                "Bob",
405                MessageFlags::READ | MessageFlags::STARRED,
406                true,
407            ),
408            make_test_envelope(
409                "Team standup notes",
410                "carol@example.com",
411                "Carol",
412                MessageFlags::READ,
413                false,
414            ),
415            make_test_envelope(
416                "crates.io: Successfully published mxr@0.4.6",
417                "noreply@crates.io",
418                "crates.io",
419                MessageFlags::READ,
420                false,
421            ),
422        ];
423        for env in &envelopes {
424            idx.index_envelope(env).unwrap();
425        }
426        idx.commit().unwrap();
427        (idx, envelopes)
428    }
429
430    #[test]
431    fn build_text_query_with_boosts() {
432        let (idx, envelopes) = build_test_index();
433        let schema = MxrSchema::build();
434        let qb = QueryBuilder::new(&schema);
435
436        let node = QueryNode::Text("deployment".to_string());
437        let query = qb.build(&node);
438        let results = idx.search_ast(query, 10, 0, SortOrder::Relevance).unwrap();
439        assert_eq!(results.results.len(), 1);
440        assert_eq!(results.results[0].message_id, envelopes[0].id.as_str());
441    }
442
443    #[test]
444    fn build_field_query() {
445        let (idx, envelopes) = build_test_index();
446        let schema = MxrSchema::build();
447        let qb = QueryBuilder::new(&schema);
448
449        let node = QueryNode::Field {
450            field: QueryField::From,
451            value: "alice@example.com".to_string(),
452        };
453        let query = qb.build(&node);
454        let results = idx.search_ast(query, 10, 0, SortOrder::Relevance).unwrap();
455        assert_eq!(results.results.len(), 1);
456        assert_eq!(results.results[0].message_id, envelopes[0].id.as_str());
457    }
458
459    #[test]
460    fn build_filter_query() {
461        let (idx, _envelopes) = build_test_index();
462        let schema = MxrSchema::build();
463        let qb = QueryBuilder::new(&schema);
464
465        // Search for unread messages
466        let node = QueryNode::Filter(FilterKind::Unread);
467        let query = qb.build(&node);
468        let results = idx.search_ast(query, 10, 0, SortOrder::Relevance).unwrap();
469        // Only the first envelope is unread (flags empty)
470        assert_eq!(results.results.len(), 1);
471    }
472
473    #[test]
474    fn build_date_range_query() {
475        let (idx, _envelopes) = build_test_index();
476        let schema = MxrSchema::build();
477        let qb = QueryBuilder::new(&schema);
478
479        // All test envelopes are dated today, so after yesterday should return all
480        let yesterday = Local::now().date_naive().pred_opt().unwrap();
481        let node = QueryNode::DateRange {
482            bound: DateBound::After,
483            date: DateValue::Specific(yesterday),
484        };
485        let query = qb.build(&node);
486        let results = idx.search_ast(query, 10, 0, SortOrder::Relevance).unwrap();
487        assert_eq!(results.results.len(), 4);
488    }
489
490    #[test]
491    fn build_compound_query() {
492        let (idx, envelopes) = build_test_index();
493        let schema = MxrSchema::build();
494        let qb = QueryBuilder::new(&schema);
495
496        // from:bob AND is:starred
497        let node = QueryNode::And(
498            Box::new(QueryNode::Field {
499                field: QueryField::From,
500                value: "bob@example.com".to_string(),
501            }),
502            Box::new(QueryNode::Filter(FilterKind::Starred)),
503        );
504        let query = qb.build(&node);
505        let results = idx.search_ast(query, 10, 0, SortOrder::Relevance).unwrap();
506        assert_eq!(results.results.len(), 1);
507        assert_eq!(results.results[0].message_id, envelopes[1].id.as_str());
508    }
509
510    #[test]
511    fn search_with_parsed_query() {
512        let (idx, envelopes) = build_test_index();
513        let schema = MxrSchema::build();
514        let qb = QueryBuilder::new(&schema);
515
516        let ast = parse_query("from:alice@example.com").unwrap();
517        let query = qb.build(&ast);
518        let results = idx.search_ast(query, 10, 0, SortOrder::Relevance).unwrap();
519        assert_eq!(results.results.len(), 1);
520        assert_eq!(results.results[0].message_id, envelopes[0].id.as_str());
521    }
522
523    #[test]
524    fn build_text_query_tokenizes_punctuation_heavy_terms() {
525        let (idx, envelopes) = build_test_index();
526        let schema = MxrSchema::build();
527        let qb = QueryBuilder::new(&schema);
528
529        let crates_ast = parse_query("crates.io").unwrap();
530        let crates_query = qb.build(&crates_ast);
531        let crates_results = idx
532            .search_ast(crates_query, 10, 0, SortOrder::Relevance)
533            .unwrap();
534        assert_eq!(crates_results.results.len(), 1);
535        assert_eq!(
536            crates_results.results[0].message_id,
537            envelopes[3].id.as_str()
538        );
539
540        let version_ast = parse_query("mxr@0.4.6").unwrap();
541        let version_query = qb.build(&version_ast);
542        let version_results = idx
543            .search_ast(version_query, 10, 0, SortOrder::Relevance)
544            .unwrap();
545        assert_eq!(version_results.results.len(), 1);
546        assert_eq!(
547            version_results.results[0].message_id,
548            envelopes[3].id.as_str()
549        );
550    }
551}