1use crate::ast::*;
2use crate::schema::MxrSchema;
3use chrono::{Datelike, Local, NaiveDate};
4use std::ops::Bound;
5use tantivy::query::{
6 AllQuery, BooleanQuery, BoostQuery, Occur, PhraseQuery, Query, RangeQuery, TermQuery,
7};
8use tantivy::schema::{Field, IndexRecordOption};
9use tantivy::Term;
10
11pub struct QueryBuilder {
12 subject: Field,
13 from_name: Field,
14 from_email: Field,
15 to_email: Field,
16 cc_email: Field,
17 bcc_email: Field,
18 snippet: Field,
19 body_text: Field,
20 attachment_filenames: Field,
21 labels: Field,
22 is_read: Field,
23 is_starred: Field,
24 is_draft: Field,
25 is_sent: Field,
26 is_trash: Field,
27 is_spam: Field,
28 is_answered: Field,
29 has_attachments: Field,
30}
31
32impl QueryBuilder {
33 pub fn new(schema: &MxrSchema) -> Self {
34 Self {
35 subject: schema.subject,
36 from_name: schema.from_name,
37 from_email: schema.from_email,
38 to_email: schema.to_email,
39 cc_email: schema.cc_email,
40 bcc_email: schema.bcc_email,
41 snippet: schema.snippet,
42 body_text: schema.body_text,
43 attachment_filenames: schema.attachment_filenames,
44 labels: schema.labels,
45 is_read: schema.is_read,
46 is_starred: schema.is_starred,
47 is_draft: schema.is_draft,
48 is_sent: schema.is_sent,
49 is_trash: schema.is_trash,
50 is_spam: schema.is_spam,
51 is_answered: schema.is_answered,
52 has_attachments: schema.has_attachments,
53 }
54 }
55
56 pub fn build(&self, node: &QueryNode) -> Box<dyn Query> {
57 match node {
58 QueryNode::Text(text) => self.build_text_query(text),
59 QueryNode::Phrase(phrase) => self.build_phrase_query(phrase),
60 QueryNode::Field { field, value } => self.build_field_query(field, value),
61 QueryNode::Filter(filter) => self.build_filter_query(filter),
62 QueryNode::Label(label) => self.build_label_query(label),
63 QueryNode::DateRange { bound, date } => self.build_date_query(bound, date),
64 QueryNode::Size { op, bytes } => self.build_size_query(op, *bytes),
65 QueryNode::And(left, right) => {
66 let left_q = self.build(left);
67 let right_q = self.build(right);
68 Box::new(BooleanQuery::new(vec![
69 (Occur::Must, left_q),
70 (Occur::Must, right_q),
71 ]))
72 }
73 QueryNode::Or(left, right) => {
74 let left_q = self.build(left);
75 let right_q = self.build(right);
76 Box::new(BooleanQuery::new(vec![
77 (Occur::Should, left_q),
78 (Occur::Should, right_q),
79 ]))
80 }
81 QueryNode::Not(inner) => {
82 let inner_q = self.build(inner);
83 Box::new(BooleanQuery::new(vec![
84 (Occur::MustNot, inner_q),
85 (Occur::Should, Box::new(AllQuery)),
87 ]))
88 }
89 }
90 }
91
92 fn build_text_query(&self, text: &str) -> Box<dyn Query> {
93 let fields_boosts: Vec<(Field, f32)> = vec![
94 (self.subject, 3.0),
95 (self.from_name, 2.0),
96 (self.from_email, 2.0),
97 (self.snippet, 1.0),
98 (self.body_text, 0.5),
99 (self.attachment_filenames, 0.75),
100 ];
101
102 let tokens = tokenize_text_value(text);
103 if tokens.is_empty() {
104 return self.build_text_token_query(&fields_boosts, &text.to_lowercase());
105 }
106 if tokens.len() == 1 {
107 return self.build_text_token_query(&fields_boosts, &tokens[0]);
108 }
109
110 let token_groups = tokens
111 .into_iter()
112 .map(|token| {
113 (
114 Occur::Must,
115 self.build_text_token_query(&fields_boosts, &token),
116 )
117 })
118 .collect();
119 Box::new(BooleanQuery::new(token_groups))
120 }
121
122 fn build_phrase_query(&self, phrase: &str) -> Box<dyn Query> {
123 let terms: Vec<Term> = phrase
124 .split_whitespace()
125 .map(|w| Term::from_field_text(self.subject, &w.to_lowercase()))
126 .collect();
127
128 if terms.len() == 1 {
129 let tq = TermQuery::new(
130 terms.into_iter().next().unwrap(),
131 IndexRecordOption::WithFreqs,
132 );
133 return Box::new(BoostQuery::new(Box::new(tq), 3.0));
134 }
135
136 let phrase_q = PhraseQuery::new(terms);
137 Box::new(BoostQuery::new(Box::new(phrase_q), 3.0))
138 }
139
140 fn build_field_query(&self, field: &QueryField, value: &str) -> Box<dyn Query> {
141 let tantivy_field = match field {
142 QueryField::From => self.from_email,
143 QueryField::To => self.to_email,
144 QueryField::Cc => self.cc_email,
145 QueryField::Bcc => self.bcc_email,
146 QueryField::Subject => self.subject,
147 QueryField::Body => self.body_text,
148 QueryField::Filename => self.attachment_filenames,
149 };
150
151 match field {
152 QueryField::Subject | QueryField::Body | QueryField::Filename => {
153 self.build_text_field_query(tantivy_field, value)
154 }
155 QueryField::From | QueryField::To | QueryField::Cc | QueryField::Bcc => {
156 let term = Term::from_field_text(tantivy_field, value);
157 Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs))
158 }
159 }
160 }
161
162 fn build_filter_query(&self, filter: &FilterKind) -> Box<dyn Query> {
163 match filter {
164 FilterKind::Read => {
165 let term = Term::from_field_bool(self.is_read, true);
166 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
167 }
168 FilterKind::Unread => {
169 let term = Term::from_field_bool(self.is_read, false);
170 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
171 }
172 FilterKind::Starred => {
173 let term = Term::from_field_bool(self.is_starred, true);
174 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
175 }
176 FilterKind::Draft => {
177 let term = Term::from_field_bool(self.is_draft, true);
178 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
179 }
180 FilterKind::Sent => {
181 let term = Term::from_field_bool(self.is_sent, true);
182 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
183 }
184 FilterKind::Trash => {
185 let term = Term::from_field_bool(self.is_trash, true);
186 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
187 }
188 FilterKind::Spam => {
189 let term = Term::from_field_bool(self.is_spam, true);
190 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
191 }
192 FilterKind::Answered => {
193 let term = Term::from_field_bool(self.is_answered, true);
194 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
195 }
196 FilterKind::Inbox => self.build_label_query("INBOX"),
197 FilterKind::Archived => Box::new(BooleanQuery::new(vec![
198 (Occur::Should, self.build_label_query("ARCHIVE")),
199 (
200 Occur::Should,
201 Box::new(BooleanQuery::new(vec![
202 (Occur::MustNot, self.build_label_query("INBOX")),
203 (Occur::MustNot, self.build_filter_query(&FilterKind::Sent)),
204 (Occur::MustNot, self.build_filter_query(&FilterKind::Draft)),
205 (Occur::MustNot, self.build_filter_query(&FilterKind::Trash)),
206 (Occur::MustNot, self.build_filter_query(&FilterKind::Spam)),
207 (Occur::Should, Box::new(AllQuery)),
208 ])),
209 ),
210 ])),
211 FilterKind::HasAttachment => {
212 let term = Term::from_field_bool(self.has_attachments, true);
213 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
214 }
215 }
216 }
217
218 fn build_label_query(&self, label: &str) -> Box<dyn Query> {
219 let term = Term::from_field_text(self.labels, &label.to_lowercase());
220 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
221 }
222
223 fn build_date_query(&self, bound: &DateBound, date_val: &DateValue) -> Box<dyn Query> {
224 let resolved = resolve_date(date_val);
225 let field_name = "date".to_string();
226 let start = self.date_to_tantivy(resolved);
227
228 match bound {
229 DateBound::After => Box::new(RangeQuery::new_date_bounds(
230 field_name,
231 Bound::Included(start),
232 Bound::Unbounded,
233 )),
234 DateBound::Before => Box::new(RangeQuery::new_date_bounds(
235 field_name,
236 Bound::Unbounded,
237 Bound::Excluded(start),
238 )),
239 DateBound::Exact => {
240 let end_date = resolved.succ_opt().unwrap_or(resolved);
241 let end = self.date_to_tantivy(end_date);
242 Box::new(RangeQuery::new_date_bounds(
243 field_name,
244 Bound::Included(start),
245 Bound::Excluded(end),
246 ))
247 }
248 }
249 }
250
251 fn build_size_query(&self, op: &SizeOp, bytes: u64) -> Box<dyn Query> {
252 let field_name = "size_bytes".to_string();
253 match op {
254 SizeOp::LessThan => Box::new(RangeQuery::new_u64_bounds(
255 field_name,
256 Bound::Unbounded,
257 Bound::Excluded(bytes),
258 )),
259 SizeOp::LessThanOrEqual => Box::new(RangeQuery::new_u64_bounds(
260 field_name,
261 Bound::Unbounded,
262 Bound::Included(bytes),
263 )),
264 SizeOp::Equal => Box::new(RangeQuery::new_u64_bounds(
265 field_name,
266 Bound::Included(bytes),
267 Bound::Included(bytes),
268 )),
269 SizeOp::GreaterThan => Box::new(RangeQuery::new_u64_bounds(
270 field_name,
271 Bound::Excluded(bytes),
272 Bound::Unbounded,
273 )),
274 SizeOp::GreaterThanOrEqual => Box::new(RangeQuery::new_u64_bounds(
275 field_name,
276 Bound::Included(bytes),
277 Bound::Unbounded,
278 )),
279 }
280 }
281
282 fn build_text_field_query(&self, field: Field, value: &str) -> Box<dyn Query> {
283 let terms: Vec<Term> = tokenize_text_value(value)
284 .into_iter()
285 .map(|word| Term::from_field_text(field, &word))
286 .collect();
287
288 if terms.len() <= 1 {
289 let term = terms
290 .into_iter()
291 .next()
292 .unwrap_or_else(|| Term::from_field_text(field, &value.to_lowercase()));
293 return Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs));
294 }
295
296 Box::new(PhraseQuery::new(terms))
297 }
298
299 fn build_text_token_query(
300 &self,
301 fields_boosts: &[(Field, f32)],
302 token: &str,
303 ) -> Box<dyn Query> {
304 let subqueries = fields_boosts
305 .iter()
306 .map(|(field, boost)| {
307 let term = Term::from_field_text(*field, token);
308 let tq = TermQuery::new(term, IndexRecordOption::WithFreqs);
309 let boosted: Box<dyn Query> = Box::new(BoostQuery::new(Box::new(tq), *boost));
310 (Occur::Should, boosted)
311 })
312 .collect();
313 Box::new(BooleanQuery::new(subqueries))
314 }
315
316 fn date_to_tantivy(&self, date: NaiveDate) -> tantivy::DateTime {
317 let dt = date.and_hms_opt(0, 0, 0).unwrap();
318 let ts = dt.and_utc().timestamp();
319 tantivy::DateTime::from_timestamp_secs(ts)
320 }
321}
322
323fn resolve_date(date_val: &DateValue) -> NaiveDate {
324 let today = Local::now().date_naive();
325 match date_val {
326 DateValue::Specific(d) => *d,
327 DateValue::Today => today,
328 DateValue::Yesterday => today.pred_opt().unwrap_or(today),
329 DateValue::ThisWeek => {
330 let weekday = today.weekday().num_days_from_monday();
331 today - chrono::Duration::days(weekday as i64)
332 }
333 DateValue::ThisMonth => {
334 NaiveDate::from_ymd_opt(today.year(), today.month(), 1).unwrap_or(today)
335 }
336 }
337}
338
339fn tokenize_text_value(value: &str) -> Vec<String> {
340 value
341 .split(|ch: char| !ch.is_alphanumeric())
342 .filter(|part| !part.is_empty())
343 .map(|part| part.to_lowercase())
344 .collect()
345}
346
347#[cfg(test)]
348mod tests {
349 use super::*;
350 use crate::index::SearchIndex;
351 use crate::parser::parse_query;
352 use mxr_core::id::*;
353 use mxr_core::types::*;
354
355 fn make_test_envelope(
356 subject: &str,
357 from_email: &str,
358 from_name: &str,
359 flags: MessageFlags,
360 has_attachments: bool,
361 ) -> Envelope {
362 Envelope {
363 id: MessageId::new(),
364 account_id: AccountId::new(),
365 provider_id: format!("fake-{}", subject.len()),
366 thread_id: ThreadId::new(),
367 message_id_header: None,
368 in_reply_to: None,
369 references: vec![],
370 from: Address {
371 name: Some(from_name.to_string()),
372 email: from_email.to_string(),
373 },
374 to: vec![Address {
375 name: None,
376 email: "recipient@example.com".to_string(),
377 }],
378 cc: vec![],
379 bcc: vec![],
380 subject: subject.to_string(),
381 date: chrono::Utc::now(),
382 flags,
383 snippet: format!("Snippet for {}", subject),
384 has_attachments,
385 size_bytes: 1000,
386 unsubscribe: UnsubscribeMethod::None,
387 label_provider_ids: vec![],
388 }
389 }
390
391 fn build_test_index() -> (SearchIndex, Vec<Envelope>) {
392 let mut idx = SearchIndex::in_memory().unwrap();
393 let envelopes = vec![
394 make_test_envelope(
395 "Deployment plan for v2",
396 "alice@example.com",
397 "Alice",
398 MessageFlags::empty(), false,
400 ),
401 make_test_envelope(
402 "Invoice #2847",
403 "bob@example.com",
404 "Bob",
405 MessageFlags::READ | MessageFlags::STARRED,
406 true,
407 ),
408 make_test_envelope(
409 "Team standup notes",
410 "carol@example.com",
411 "Carol",
412 MessageFlags::READ,
413 false,
414 ),
415 make_test_envelope(
416 "crates.io: Successfully published mxr@0.4.6",
417 "noreply@crates.io",
418 "crates.io",
419 MessageFlags::READ,
420 false,
421 ),
422 ];
423 for env in &envelopes {
424 idx.index_envelope(env).unwrap();
425 }
426 idx.commit().unwrap();
427 (idx, envelopes)
428 }
429
430 #[test]
431 fn build_text_query_with_boosts() {
432 let (idx, envelopes) = build_test_index();
433 let schema = MxrSchema::build();
434 let qb = QueryBuilder::new(&schema);
435
436 let node = QueryNode::Text("deployment".to_string());
437 let query = qb.build(&node);
438 let results = idx.search_ast(query, 10, 0, SortOrder::Relevance).unwrap();
439 assert_eq!(results.results.len(), 1);
440 assert_eq!(results.results[0].message_id, envelopes[0].id.as_str());
441 }
442
443 #[test]
444 fn build_field_query() {
445 let (idx, envelopes) = build_test_index();
446 let schema = MxrSchema::build();
447 let qb = QueryBuilder::new(&schema);
448
449 let node = QueryNode::Field {
450 field: QueryField::From,
451 value: "alice@example.com".to_string(),
452 };
453 let query = qb.build(&node);
454 let results = idx.search_ast(query, 10, 0, SortOrder::Relevance).unwrap();
455 assert_eq!(results.results.len(), 1);
456 assert_eq!(results.results[0].message_id, envelopes[0].id.as_str());
457 }
458
459 #[test]
460 fn build_filter_query() {
461 let (idx, _envelopes) = build_test_index();
462 let schema = MxrSchema::build();
463 let qb = QueryBuilder::new(&schema);
464
465 let node = QueryNode::Filter(FilterKind::Unread);
467 let query = qb.build(&node);
468 let results = idx.search_ast(query, 10, 0, SortOrder::Relevance).unwrap();
469 assert_eq!(results.results.len(), 1);
471 }
472
473 #[test]
474 fn build_date_range_query() {
475 let (idx, _envelopes) = build_test_index();
476 let schema = MxrSchema::build();
477 let qb = QueryBuilder::new(&schema);
478
479 let yesterday = Local::now().date_naive().pred_opt().unwrap();
481 let node = QueryNode::DateRange {
482 bound: DateBound::After,
483 date: DateValue::Specific(yesterday),
484 };
485 let query = qb.build(&node);
486 let results = idx.search_ast(query, 10, 0, SortOrder::Relevance).unwrap();
487 assert_eq!(results.results.len(), 4);
488 }
489
490 #[test]
491 fn build_compound_query() {
492 let (idx, envelopes) = build_test_index();
493 let schema = MxrSchema::build();
494 let qb = QueryBuilder::new(&schema);
495
496 let node = QueryNode::And(
498 Box::new(QueryNode::Field {
499 field: QueryField::From,
500 value: "bob@example.com".to_string(),
501 }),
502 Box::new(QueryNode::Filter(FilterKind::Starred)),
503 );
504 let query = qb.build(&node);
505 let results = idx.search_ast(query, 10, 0, SortOrder::Relevance).unwrap();
506 assert_eq!(results.results.len(), 1);
507 assert_eq!(results.results[0].message_id, envelopes[1].id.as_str());
508 }
509
510 #[test]
511 fn search_with_parsed_query() {
512 let (idx, envelopes) = build_test_index();
513 let schema = MxrSchema::build();
514 let qb = QueryBuilder::new(&schema);
515
516 let ast = parse_query("from:alice@example.com").unwrap();
517 let query = qb.build(&ast);
518 let results = idx.search_ast(query, 10, 0, SortOrder::Relevance).unwrap();
519 assert_eq!(results.results.len(), 1);
520 assert_eq!(results.results[0].message_id, envelopes[0].id.as_str());
521 }
522
523 #[test]
524 fn build_text_query_tokenizes_punctuation_heavy_terms() {
525 let (idx, envelopes) = build_test_index();
526 let schema = MxrSchema::build();
527 let qb = QueryBuilder::new(&schema);
528
529 let crates_ast = parse_query("crates.io").unwrap();
530 let crates_query = qb.build(&crates_ast);
531 let crates_results = idx
532 .search_ast(crates_query, 10, 0, SortOrder::Relevance)
533 .unwrap();
534 assert_eq!(crates_results.results.len(), 1);
535 assert_eq!(
536 crates_results.results[0].message_id,
537 envelopes[3].id.as_str()
538 );
539
540 let version_ast = parse_query("mxr@0.4.6").unwrap();
541 let version_query = qb.build(&version_ast);
542 let version_results = idx
543 .search_ast(version_query, 10, 0, SortOrder::Relevance)
544 .unwrap();
545 assert_eq!(version_results.results.len(), 1);
546 assert_eq!(
547 version_results.results[0].message_id,
548 envelopes[3].id.as_str()
549 );
550 }
551}