1use crate::ast::*;
2use crate::schema::MxrSchema;
3use chrono::{Datelike, Local, NaiveDate};
4use std::ops::Bound;
5use tantivy::query::{AllQuery, BooleanQuery, BoostQuery, Occur, PhraseQuery, Query, RangeQuery, TermQuery};
6use tantivy::schema::{Field, IndexRecordOption};
7use tantivy::Term;
8
9pub struct QueryBuilder {
10 subject: Field,
11 from_name: Field,
12 from_email: Field,
13 to_email: Field,
14 cc_email: Field,
15 bcc_email: Field,
16 snippet: Field,
17 body_text: Field,
18 attachment_filenames: Field,
19 labels: Field,
20 is_read: Field,
21 is_starred: Field,
22 is_draft: Field,
23 is_sent: Field,
24 is_trash: Field,
25 is_spam: Field,
26 is_answered: Field,
27 has_attachments: Field,
28}
29
30impl QueryBuilder {
31 pub fn new(schema: &MxrSchema) -> Self {
32 Self {
33 subject: schema.subject,
34 from_name: schema.from_name,
35 from_email: schema.from_email,
36 to_email: schema.to_email,
37 cc_email: schema.cc_email,
38 bcc_email: schema.bcc_email,
39 snippet: schema.snippet,
40 body_text: schema.body_text,
41 attachment_filenames: schema.attachment_filenames,
42 labels: schema.labels,
43 is_read: schema.is_read,
44 is_starred: schema.is_starred,
45 is_draft: schema.is_draft,
46 is_sent: schema.is_sent,
47 is_trash: schema.is_trash,
48 is_spam: schema.is_spam,
49 is_answered: schema.is_answered,
50 has_attachments: schema.has_attachments,
51 }
52 }
53
54 pub fn build(&self, node: &QueryNode) -> Box<dyn Query> {
55 match node {
56 QueryNode::Text(text) => self.build_text_query(text),
57 QueryNode::Phrase(phrase) => self.build_phrase_query(phrase),
58 QueryNode::Field { field, value } => self.build_field_query(field, value),
59 QueryNode::Filter(filter) => self.build_filter_query(filter),
60 QueryNode::Label(label) => self.build_label_query(label),
61 QueryNode::DateRange { bound, date } => self.build_date_query(bound, date),
62 QueryNode::Size { op, bytes } => self.build_size_query(op, *bytes),
63 QueryNode::And(left, right) => {
64 let left_q = self.build(left);
65 let right_q = self.build(right);
66 Box::new(BooleanQuery::new(vec![
67 (Occur::Must, left_q),
68 (Occur::Must, right_q),
69 ]))
70 }
71 QueryNode::Or(left, right) => {
72 let left_q = self.build(left);
73 let right_q = self.build(right);
74 Box::new(BooleanQuery::new(vec![
75 (Occur::Should, left_q),
76 (Occur::Should, right_q),
77 ]))
78 }
79 QueryNode::Not(inner) => {
80 let inner_q = self.build(inner);
81 Box::new(BooleanQuery::new(vec![
82 (Occur::MustNot, inner_q),
83 (Occur::Should, Box::new(AllQuery)),
85 ]))
86 }
87 }
88 }
89
90 fn build_text_query(&self, text: &str) -> Box<dyn Query> {
91 let fields_boosts: Vec<(Field, f32)> = vec![
92 (self.subject, 3.0),
93 (self.from_name, 2.0),
94 (self.from_email, 2.0),
95 (self.snippet, 1.0),
96 (self.body_text, 0.5),
97 ];
98
99 let text_lower = text.to_lowercase();
100 let subqueries: Vec<(Occur, Box<dyn Query>)> = fields_boosts
101 .into_iter()
102 .map(|(field, boost)| {
103 let term = Term::from_field_text(field, &text_lower);
104 let tq = TermQuery::new(term, IndexRecordOption::WithFreqs);
105 let boosted: Box<dyn Query> = Box::new(BoostQuery::new(Box::new(tq), boost));
106 (Occur::Should, boosted)
107 })
108 .collect();
109
110 Box::new(BooleanQuery::new(subqueries))
111 }
112
113 fn build_phrase_query(&self, phrase: &str) -> Box<dyn Query> {
114 let terms: Vec<Term> = phrase
115 .split_whitespace()
116 .map(|w| Term::from_field_text(self.subject, &w.to_lowercase()))
117 .collect();
118
119 if terms.len() == 1 {
120 let tq = TermQuery::new(
121 terms.into_iter().next().unwrap(),
122 IndexRecordOption::WithFreqs,
123 );
124 return Box::new(BoostQuery::new(Box::new(tq), 3.0));
125 }
126
127 let phrase_q = PhraseQuery::new(terms);
128 Box::new(BoostQuery::new(Box::new(phrase_q), 3.0))
129 }
130
131 fn build_field_query(&self, field: &QueryField, value: &str) -> Box<dyn Query> {
132 let tantivy_field = match field {
133 QueryField::From => self.from_email,
134 QueryField::To => self.to_email,
135 QueryField::Cc => self.cc_email,
136 QueryField::Bcc => self.bcc_email,
137 QueryField::Subject => self.subject,
138 QueryField::Body => self.body_text,
139 QueryField::Filename => self.attachment_filenames,
140 };
141
142 match field {
143 QueryField::Subject | QueryField::Body | QueryField::Filename => {
144 self.build_text_field_query(tantivy_field, value)
145 }
146 QueryField::From | QueryField::To | QueryField::Cc | QueryField::Bcc => {
147 let term = Term::from_field_text(tantivy_field, value);
148 Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs))
149 }
150 }
151 }
152
153 fn build_filter_query(&self, filter: &FilterKind) -> Box<dyn Query> {
154 match filter {
155 FilterKind::Read => {
156 let term = Term::from_field_bool(self.is_read, true);
157 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
158 }
159 FilterKind::Unread => {
160 let term = Term::from_field_bool(self.is_read, false);
161 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
162 }
163 FilterKind::Starred => {
164 let term = Term::from_field_bool(self.is_starred, true);
165 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
166 }
167 FilterKind::Draft => {
168 let term = Term::from_field_bool(self.is_draft, true);
169 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
170 }
171 FilterKind::Sent => {
172 let term = Term::from_field_bool(self.is_sent, true);
173 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
174 }
175 FilterKind::Trash => {
176 let term = Term::from_field_bool(self.is_trash, true);
177 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
178 }
179 FilterKind::Spam => {
180 let term = Term::from_field_bool(self.is_spam, true);
181 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
182 }
183 FilterKind::Answered => {
184 let term = Term::from_field_bool(self.is_answered, true);
185 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
186 }
187 FilterKind::Inbox => self.build_label_query("INBOX"),
188 FilterKind::Archived => Box::new(BooleanQuery::new(vec![
189 (Occur::Should, self.build_label_query("ARCHIVE")),
190 (Occur::Should, Box::new(BooleanQuery::new(vec![
191 (Occur::MustNot, self.build_label_query("INBOX")),
192 (Occur::MustNot, self.build_filter_query(&FilterKind::Sent)),
193 (Occur::MustNot, self.build_filter_query(&FilterKind::Draft)),
194 (Occur::MustNot, self.build_filter_query(&FilterKind::Trash)),
195 (Occur::MustNot, self.build_filter_query(&FilterKind::Spam)),
196 (Occur::Should, Box::new(AllQuery)),
197 ]))),
198 ])),
199 FilterKind::HasAttachment => {
200 let term = Term::from_field_bool(self.has_attachments, true);
201 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
202 }
203 }
204 }
205
206 fn build_label_query(&self, label: &str) -> Box<dyn Query> {
207 let term = Term::from_field_text(self.labels, &label.to_lowercase());
208 Box::new(TermQuery::new(term, IndexRecordOption::Basic))
209 }
210
211 fn build_date_query(&self, bound: &DateBound, date_val: &DateValue) -> Box<dyn Query> {
212 let resolved = resolve_date(date_val);
213 let field_name = "date".to_string();
214 let start = self.date_to_tantivy(resolved);
215
216 match bound {
217 DateBound::After => Box::new(RangeQuery::new_date_bounds(
218 field_name,
219 Bound::Included(start),
220 Bound::Unbounded,
221 )),
222 DateBound::Before => Box::new(RangeQuery::new_date_bounds(
223 field_name,
224 Bound::Unbounded,
225 Bound::Excluded(start),
226 )),
227 DateBound::Exact => {
228 let end_date = resolved.succ_opt().unwrap_or(resolved);
229 let end = self.date_to_tantivy(end_date);
230 Box::new(RangeQuery::new_date_bounds(
231 field_name,
232 Bound::Included(start),
233 Bound::Excluded(end),
234 ))
235 }
236 }
237 }
238
239 fn build_size_query(&self, op: &SizeOp, bytes: u64) -> Box<dyn Query> {
240 let field_name = "size_bytes".to_string();
241 match op {
242 SizeOp::LessThan => Box::new(RangeQuery::new_u64_bounds(
243 field_name,
244 Bound::Unbounded,
245 Bound::Excluded(bytes),
246 )),
247 SizeOp::LessThanOrEqual => Box::new(RangeQuery::new_u64_bounds(
248 field_name,
249 Bound::Unbounded,
250 Bound::Included(bytes),
251 )),
252 SizeOp::Equal => Box::new(RangeQuery::new_u64_bounds(
253 field_name,
254 Bound::Included(bytes),
255 Bound::Included(bytes),
256 )),
257 SizeOp::GreaterThan => Box::new(RangeQuery::new_u64_bounds(
258 field_name,
259 Bound::Excluded(bytes),
260 Bound::Unbounded,
261 )),
262 SizeOp::GreaterThanOrEqual => Box::new(RangeQuery::new_u64_bounds(
263 field_name,
264 Bound::Included(bytes),
265 Bound::Unbounded,
266 )),
267 }
268 }
269
270 fn build_text_field_query(&self, field: Field, value: &str) -> Box<dyn Query> {
271 let terms: Vec<Term> = tokenize_text_value(value)
272 .into_iter()
273 .map(|word| Term::from_field_text(field, &word))
274 .collect();
275
276 if terms.len() <= 1 {
277 let term = terms
278 .into_iter()
279 .next()
280 .unwrap_or_else(|| Term::from_field_text(field, &value.to_lowercase()));
281 return Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs));
282 }
283
284 Box::new(PhraseQuery::new(terms))
285 }
286
287 fn date_to_tantivy(&self, date: NaiveDate) -> tantivy::DateTime {
288 let dt = date.and_hms_opt(0, 0, 0).unwrap();
289 let ts = dt.and_utc().timestamp();
290 tantivy::DateTime::from_timestamp_secs(ts)
291 }
292}
293
294fn resolve_date(date_val: &DateValue) -> NaiveDate {
295 let today = Local::now().date_naive();
296 match date_val {
297 DateValue::Specific(d) => *d,
298 DateValue::Today => today,
299 DateValue::Yesterday => today.pred_opt().unwrap_or(today),
300 DateValue::ThisWeek => {
301 let weekday = today.weekday().num_days_from_monday();
302 today - chrono::Duration::days(weekday as i64)
303 }
304 DateValue::ThisMonth => {
305 NaiveDate::from_ymd_opt(today.year(), today.month(), 1).unwrap_or(today)
306 }
307 }
308}
309
310fn tokenize_text_value(value: &str) -> Vec<String> {
311 value
312 .split(|ch: char| !ch.is_alphanumeric())
313 .filter(|part| !part.is_empty())
314 .map(|part| part.to_lowercase())
315 .collect()
316}
317
318#[cfg(test)]
319mod tests {
320 use super::*;
321 use crate::index::SearchIndex;
322 use crate::parser::parse_query;
323 use mxr_core::id::*;
324 use mxr_core::types::*;
325
326 fn make_test_envelope(
327 subject: &str,
328 from_email: &str,
329 from_name: &str,
330 flags: MessageFlags,
331 has_attachments: bool,
332 ) -> Envelope {
333 Envelope {
334 id: MessageId::new(),
335 account_id: AccountId::new(),
336 provider_id: format!("fake-{}", subject.len()),
337 thread_id: ThreadId::new(),
338 message_id_header: None,
339 in_reply_to: None,
340 references: vec![],
341 from: Address {
342 name: Some(from_name.to_string()),
343 email: from_email.to_string(),
344 },
345 to: vec![Address {
346 name: None,
347 email: "recipient@example.com".to_string(),
348 }],
349 cc: vec![],
350 bcc: vec![],
351 subject: subject.to_string(),
352 date: chrono::Utc::now(),
353 flags,
354 snippet: format!("Snippet for {}", subject),
355 has_attachments,
356 size_bytes: 1000,
357 unsubscribe: UnsubscribeMethod::None,
358 label_provider_ids: vec![],
359 }
360 }
361
362 fn build_test_index() -> (SearchIndex, Vec<Envelope>) {
363 let mut idx = SearchIndex::in_memory().unwrap();
364 let envelopes = vec![
365 make_test_envelope(
366 "Deployment plan for v2",
367 "alice@example.com",
368 "Alice",
369 MessageFlags::empty(), false,
371 ),
372 make_test_envelope(
373 "Invoice #2847",
374 "bob@example.com",
375 "Bob",
376 MessageFlags::READ | MessageFlags::STARRED,
377 true,
378 ),
379 make_test_envelope(
380 "Team standup notes",
381 "carol@example.com",
382 "Carol",
383 MessageFlags::READ,
384 false,
385 ),
386 ];
387 for env in &envelopes {
388 idx.index_envelope(env).unwrap();
389 }
390 idx.commit().unwrap();
391 (idx, envelopes)
392 }
393
394 #[test]
395 fn build_text_query_with_boosts() {
396 let (idx, envelopes) = build_test_index();
397 let schema = MxrSchema::build();
398 let qb = QueryBuilder::new(&schema);
399
400 let node = QueryNode::Text("deployment".to_string());
401 let query = qb.build(&node);
402 let results = idx.search_ast(query, 10).unwrap();
403 assert_eq!(results.len(), 1);
404 assert_eq!(results[0].message_id, envelopes[0].id.as_str());
405 }
406
407 #[test]
408 fn build_field_query() {
409 let (idx, envelopes) = build_test_index();
410 let schema = MxrSchema::build();
411 let qb = QueryBuilder::new(&schema);
412
413 let node = QueryNode::Field {
414 field: QueryField::From,
415 value: "alice@example.com".to_string(),
416 };
417 let query = qb.build(&node);
418 let results = idx.search_ast(query, 10).unwrap();
419 assert_eq!(results.len(), 1);
420 assert_eq!(results[0].message_id, envelopes[0].id.as_str());
421 }
422
423 #[test]
424 fn build_filter_query() {
425 let (idx, _envelopes) = build_test_index();
426 let schema = MxrSchema::build();
427 let qb = QueryBuilder::new(&schema);
428
429 let node = QueryNode::Filter(FilterKind::Unread);
431 let query = qb.build(&node);
432 let results = idx.search_ast(query, 10).unwrap();
433 assert_eq!(results.len(), 1);
435 }
436
437 #[test]
438 fn build_date_range_query() {
439 let (idx, _envelopes) = build_test_index();
440 let schema = MxrSchema::build();
441 let qb = QueryBuilder::new(&schema);
442
443 let yesterday = Local::now().date_naive().pred_opt().unwrap();
445 let node = QueryNode::DateRange {
446 bound: DateBound::After,
447 date: DateValue::Specific(yesterday),
448 };
449 let query = qb.build(&node);
450 let results = idx.search_ast(query, 10).unwrap();
451 assert_eq!(results.len(), 3);
452 }
453
454 #[test]
455 fn build_compound_query() {
456 let (idx, envelopes) = build_test_index();
457 let schema = MxrSchema::build();
458 let qb = QueryBuilder::new(&schema);
459
460 let node = QueryNode::And(
462 Box::new(QueryNode::Field {
463 field: QueryField::From,
464 value: "bob@example.com".to_string(),
465 }),
466 Box::new(QueryNode::Filter(FilterKind::Starred)),
467 );
468 let query = qb.build(&node);
469 let results = idx.search_ast(query, 10).unwrap();
470 assert_eq!(results.len(), 1);
471 assert_eq!(results[0].message_id, envelopes[1].id.as_str());
472 }
473
474 #[test]
475 fn search_with_parsed_query() {
476 let (idx, envelopes) = build_test_index();
477 let schema = MxrSchema::build();
478 let qb = QueryBuilder::new(&schema);
479
480 let ast = parse_query("from:alice@example.com").unwrap();
481 let query = qb.build(&ast);
482 let results = idx.search_ast(query, 10).unwrap();
483 assert_eq!(results.len(), 1);
484 assert_eq!(results[0].message_id, envelopes[0].id.as_str());
485 }
486}