1use std::ops::Bound;
29
30use tantivy::{
31 query::{
32 AllQuery, BooleanQuery, FuzzyTermQuery, Occur, PhraseQuery, Query, RangeQuery, TermQuery,
33 },
34 schema::{IndexRecordOption, Schema},
35 Term,
36};
37
38#[derive(Debug, Clone)]
42pub enum SearchField {
43 Subject,
45 From,
47 To,
49 Body,
51 Header(String),
53 Cc,
55 Bcc,
57 FullText,
59 AttachmentFilenames,
61}
62
63#[derive(Debug, Clone)]
65pub enum SearchComparator {
66 Contains(String),
68 Equals(String),
70 DateSince(i64),
72 DateBefore(i64),
74 DateOn(i64),
76}
77
78#[derive(Debug, Clone)]
80pub struct SearchCondition {
81 pub field: SearchField,
82 pub comparator: SearchComparator,
83}
84
85#[derive(Debug, Clone)]
90pub enum SearchQuery {
91 Condition(SearchCondition),
93 And(Vec<SearchQuery>),
95 Or(Vec<SearchQuery>),
97 Not(Box<SearchQuery>),
99 All,
101 None,
103}
104
105#[derive(Debug, Clone, Default)]
113pub struct JmapSearchFilter {
114 pub text: Option<String>,
116 pub from: Option<String>,
118 pub to: Option<String>,
120 pub cc: Option<String>,
122 pub bcc: Option<String>,
124 pub subject: Option<String>,
126 pub body: Option<String>,
128 pub before: Option<i64>,
130 pub after: Option<i64>,
132}
133
134#[derive(Debug, Clone)]
138pub enum TermKind {
139 Exact(String),
141 Phrase(Vec<String>),
144 Fuzzy {
146 term: String,
148 distance: u8,
150 },
151}
152
153pub fn parse_search_term(s: &str) -> TermKind {
165 let trimmed = s.trim();
166
167 if trimmed.starts_with('"') && trimmed.ends_with('"') && trimmed.len() >= 2 {
169 let inner = &trimmed[1..trimmed.len() - 1];
170 let tokens: Vec<String> = inner
171 .split_whitespace()
172 .filter(|t| !t.is_empty())
173 .map(|t| t.to_lowercase())
174 .collect();
175 if !tokens.is_empty() {
176 return TermKind::Phrase(tokens);
177 }
178 }
180
181 if let Some((base, dist_str)) = trimmed.rsplit_once('~') {
183 if dist_str.len() == 1 {
184 if let Ok(dist) = dist_str.parse::<u8>() {
185 if !base.is_empty() {
186 return TermKind::Fuzzy {
187 term: base.to_lowercase(),
188 distance: dist,
189 };
190 }
191 }
192 }
193 }
194
195 let lower = trimmed.to_lowercase();
197 let words: Vec<&str> = lower.split_whitespace().collect();
198 if words.len() > 1 {
199 return TermKind::Phrase(words.into_iter().map(String::from).collect());
201 }
202
203 TermKind::Exact(lower)
204}
205
206fn resolve_field(schema: &Schema, name: &str) -> Option<tantivy::schema::Field> {
213 schema.get_field(name).ok()
214}
215
216fn build_text_query(schema: &Schema, field_name: &str, value: &str) -> Option<Box<dyn Query>> {
222 let field = resolve_field(schema, field_name)?;
223 match parse_search_term(value) {
224 TermKind::Exact(word) if !word.is_empty() => {
225 let term = Term::from_field_text(field, &word);
226 Some(Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)))
227 }
228 TermKind::Phrase(tokens) if !tokens.is_empty() => {
229 if tokens.len() == 1 {
230 let term = Term::from_field_text(field, &tokens[0]);
231 Some(Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)))
232 } else {
233 let terms: Vec<Term> = tokens
234 .iter()
235 .map(|t| Term::from_field_text(field, t))
236 .collect();
237 Some(Box::new(PhraseQuery::new(terms)))
238 }
239 }
240 TermKind::Fuzzy { term, distance } if !term.is_empty() => {
241 let t = Term::from_field_text(field, &term);
242 Some(Box::new(FuzzyTermQuery::new(t, distance, true)))
243 }
244 _ => None,
245 }
246}
247
248fn build_fulltext_query(schema: &Schema, value: &str) -> Box<dyn Query> {
251 let field_names = ["subject", "body", "header_values"];
252 let mut clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
253
254 for name in &field_names {
255 if let Some(q) = build_text_query(schema, name, value) {
256 clauses.push((Occur::Should, q));
257 }
258 }
259
260 if clauses.is_empty() {
261 Box::new(AllQuery)
263 } else {
264 Box::new(BooleanQuery::union_with_minimum_required_clauses(
265 clauses
266 .into_iter()
267 .map(|(_, q)| q)
268 .collect::<Vec<Box<dyn Query>>>(),
269 1,
270 ))
271 }
272}
273
274fn field_name_for(field: &SearchField) -> &'static str {
276 match field {
277 SearchField::Subject => "subject",
278 SearchField::From => "from",
279 SearchField::To => "to",
280 SearchField::Body => "body",
281 SearchField::Header(_) | SearchField::Cc | SearchField::Bcc => "header_values",
282 SearchField::FullText => "body", SearchField::AttachmentFilenames => "attachment_filenames",
284 }
285}
286
287pub fn search_query_to_tantivy(query: &SearchQuery, schema: &Schema) -> Box<dyn Query> {
302 match query {
303 SearchQuery::All => Box::new(AllQuery),
304
305 SearchQuery::None => {
306 if let Some(f) = resolve_field(schema, "message_id") {
309 let t = Term::from_field_text(f, "\x00__none__\x00");
310 Box::new(TermQuery::new(t, IndexRecordOption::Basic)) as Box<dyn Query>
311 } else {
312 Box::new(AllQuery) as Box<dyn Query>
313 }
314 }
315
316 SearchQuery::Condition(cond) => translate_condition(cond, schema),
317
318 SearchQuery::And(sub) => {
319 if sub.is_empty() {
320 return Box::new(AllQuery);
321 }
322 let clauses: Vec<(Occur, Box<dyn Query>)> = sub
323 .iter()
324 .map(|q| (Occur::Must, search_query_to_tantivy(q, schema)))
325 .collect();
326 Box::new(BooleanQuery::new(clauses))
327 }
328
329 SearchQuery::Or(sub) => {
330 if sub.is_empty() {
331 return Box::new(AllQuery);
332 }
333 let sub_queries: Vec<Box<dyn Query>> = sub
334 .iter()
335 .map(|q| search_query_to_tantivy(q, schema))
336 .collect();
337 Box::new(BooleanQuery::union_with_minimum_required_clauses(
338 sub_queries,
339 1,
340 ))
341 }
342
343 SearchQuery::Not(inner) => {
344 let positive: Box<dyn Query> = Box::new(AllQuery);
347 let negative = search_query_to_tantivy(inner, schema);
348 Box::new(BooleanQuery::new(vec![
349 (Occur::Must, positive),
350 (Occur::MustNot, negative),
351 ]))
352 }
353 }
354}
355
356fn translate_condition(cond: &SearchCondition, schema: &Schema) -> Box<dyn Query> {
358 match &cond.comparator {
359 SearchComparator::Contains(value) | SearchComparator::Equals(value) => match &cond.field {
360 SearchField::FullText => build_fulltext_query(schema, value),
361 other => {
362 let name = field_name_for(other);
363 build_text_query(schema, name, value).unwrap_or_else(|| Box::new(AllQuery))
364 }
365 },
366
367 SearchComparator::DateSince(ts) => {
368 if let Some(date_field) = resolve_field(schema, "date") {
369 let lower = Term::from_field_i64(date_field, *ts);
370 Box::new(RangeQuery::new(Bound::Included(lower), Bound::Unbounded))
371 } else {
372 Box::new(AllQuery)
373 }
374 }
375
376 SearchComparator::DateBefore(ts) => {
377 if let Some(date_field) = resolve_field(schema, "date") {
378 let upper = Term::from_field_i64(date_field, *ts);
379 Box::new(RangeQuery::new(Bound::Unbounded, Bound::Excluded(upper)))
380 } else {
381 Box::new(AllQuery)
382 }
383 }
384
385 SearchComparator::DateOn(ts) => {
386 if let Some(date_field) = resolve_field(schema, "date") {
388 let lower = Term::from_field_i64(date_field, *ts);
389 let upper = Term::from_field_i64(date_field, ts + 86_400);
390 Box::new(RangeQuery::new(
391 Bound::Included(lower),
392 Bound::Excluded(upper),
393 ))
394 } else {
395 Box::new(AllQuery)
396 }
397 }
398 }
399}
400
401pub fn jmap_filter_to_tantivy(filter: &JmapSearchFilter, schema: &Schema) -> Box<dyn Query> {
411 let mut clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
412
413 if let Some(text) = &filter.text {
415 if !text.is_empty() {
416 clauses.push((Occur::Must, build_fulltext_query(schema, text)));
417 }
418 }
419
420 let field_map: &[(&Option<String>, &str)] = &[
422 (&filter.from, "from"),
423 (&filter.to, "to"),
424 (&filter.subject, "subject"),
425 (&filter.body, "body"),
426 ];
427
428 for (opt, field_name) in field_map {
429 if let Some(val) = opt {
430 if !val.is_empty() {
431 if let Some(q) = build_text_query(schema, field_name, val) {
432 clauses.push((Occur::Must, q));
433 }
434 }
435 }
436 }
437
438 for val in [&filter.cc, &filter.bcc].into_iter().flatten() {
440 if !val.is_empty() {
441 if let Some(q) = build_text_query(schema, "header_values", val) {
442 clauses.push((Occur::Must, q));
443 }
444 }
445 }
446
447 if let (Some(after), Some(before)) = (filter.after, filter.before) {
449 if let Some(date_field) = resolve_field(schema, "date") {
450 let lower = Term::from_field_i64(date_field, after);
451 let upper = Term::from_field_i64(date_field, before);
452 let range: Box<dyn Query> = Box::new(RangeQuery::new(
453 Bound::Included(lower),
454 Bound::Excluded(upper),
455 ));
456 clauses.push((Occur::Must, range));
457 }
458 } else if let Some(after) = filter.after {
459 if let Some(date_field) = resolve_field(schema, "date") {
460 let lower = Term::from_field_i64(date_field, after);
461 let range: Box<dyn Query> =
462 Box::new(RangeQuery::new(Bound::Included(lower), Bound::Unbounded));
463 clauses.push((Occur::Must, range));
464 }
465 } else if let Some(before) = filter.before {
466 if let Some(date_field) = resolve_field(schema, "date") {
467 let upper = Term::from_field_i64(date_field, before);
468 let range: Box<dyn Query> =
469 Box::new(RangeQuery::new(Bound::Unbounded, Bound::Excluded(upper)));
470 clauses.push((Occur::Must, range));
471 }
472 }
473
474 if clauses.is_empty() {
475 Box::new(AllQuery)
476 } else {
477 Box::new(BooleanQuery::new(clauses))
478 }
479}
480
481#[cfg(test)]
484mod tests {
485 use super::*;
486 use crate::SearchIndex;
487 use bytes::Bytes;
488 use rusmes_proto::mail::Mail;
489 use rusmes_proto::message::{HeaderMap, MessageBody, MessageId, MimeMessage};
490
491 fn make_schema() -> tantivy::schema::Schema {
496 use tantivy::schema::{NumericOptions, STORED, TEXT};
497 let mut b = tantivy::schema::SchemaBuilder::default();
498 b.add_text_field("message_id", STORED);
499 b.add_text_field("from", TEXT | STORED);
500 b.add_text_field("to", TEXT | STORED);
501 b.add_text_field("subject", TEXT | STORED);
502 b.add_text_field("body", TEXT);
503 b.add_text_field("attachment_filenames", TEXT | STORED);
504 b.add_text_field("header_values", TEXT);
505 b.add_i64_field("date", NumericOptions::default().set_indexed().set_stored());
506 b.build()
507 }
508
509 #[test]
512 fn test_parse_exact() {
513 match parse_search_term("hello") {
514 TermKind::Exact(s) => assert_eq!(s, "hello"),
515 other => panic!("expected Exact, got {other:?}"),
516 }
517 }
518
519 #[test]
520 fn test_parse_exact_lowercases() {
521 match parse_search_term("Hello") {
522 TermKind::Exact(s) => assert_eq!(s, "hello"),
523 other => panic!("expected Exact, got {other:?}"),
524 }
525 }
526
527 #[test]
528 fn test_parse_phrase() {
529 match parse_search_term("\"hello world\"") {
530 TermKind::Phrase(tokens) => {
531 assert_eq!(tokens, vec!["hello", "world"]);
532 }
533 other => panic!("expected Phrase, got {other:?}"),
534 }
535 }
536
537 #[test]
538 fn test_parse_phrase_lowercases() {
539 match parse_search_term("\"Hello World\"") {
540 TermKind::Phrase(tokens) => {
541 assert_eq!(tokens, vec!["hello", "world"]);
542 }
543 other => panic!("expected Phrase, got {other:?}"),
544 }
545 }
546
547 #[test]
548 fn test_parse_fuzzy() {
549 match parse_search_term("hello~2") {
550 TermKind::Fuzzy { term, distance } => {
551 assert_eq!(term, "hello");
552 assert_eq!(distance, 2);
553 }
554 other => panic!("expected Fuzzy, got {other:?}"),
555 }
556 }
557
558 #[test]
559 fn test_parse_fuzzy_lowercases() {
560 match parse_search_term("Hello~1") {
561 TermKind::Fuzzy { term, distance } => {
562 assert_eq!(term, "hello");
563 assert_eq!(distance, 1);
564 }
565 other => panic!("expected Fuzzy, got {other:?}"),
566 }
567 }
568
569 #[test]
570 fn test_parse_multiword_becomes_phrase() {
571 match parse_search_term("hello world") {
572 TermKind::Phrase(tokens) => {
573 assert_eq!(tokens, vec!["hello", "world"]);
574 }
575 other => panic!("expected Phrase for multi-word, got {other:?}"),
576 }
577 }
578
579 fn make_mail_raw(raw: &str) -> (MessageId, Mail) {
582 let message_id = MessageId::new();
583 let data = raw.as_bytes();
584 let message = MimeMessage::parse_from_bytes(data).unwrap_or_else(|_| {
585 let mut hdr = HeaderMap::new();
586 hdr.insert("content-type", "text/plain");
587 MimeMessage::new(hdr, MessageBody::Small(Bytes::from(raw.to_owned())))
588 });
589 let mail = Mail::new(None, vec![], message, None, None);
590 (message_id, mail)
591 }
592
593 fn make_search_index() -> (crate::TantivySearchIndex, tempfile::TempDir) {
594 let dir = tempfile::TempDir::new().expect("temp dir");
595 let idx = crate::TantivySearchIndex::new(dir.path()).expect("create index");
596 (idx, dir)
597 }
598
599 async fn index_one(idx: &crate::TantivySearchIndex, raw: &str) -> MessageId {
601 let (mid, mail) = make_mail_raw(raw);
602 idx.index_message(&mid, &mail).await.expect("index");
603 idx.commit().await.expect("commit");
604 mid
605 }
606
607 #[tokio::test]
610 async fn test_subject_query() {
611 let (idx, _dir) = make_search_index();
612
613 let raw = concat!(
615 "From: sender@example.com\r\n",
616 "To: recv@example.com\r\n",
617 "Subject: Hello World\r\n",
618 "Content-Type: text/plain\r\n",
619 "\r\n",
620 "Some body text.\r\n",
621 );
622 let mid = index_one(&idx, raw).await;
623
624 let schema = idx.schema();
625 let query = search_query_to_tantivy(
626 &SearchQuery::Condition(SearchCondition {
627 field: SearchField::Subject,
628 comparator: SearchComparator::Contains("Hello".to_string()),
629 }),
630 &schema,
631 );
632
633 let results = idx.search_by_query(query, 10).expect("search");
634 assert!(
635 !results.is_empty(),
636 "subject query should return the indexed message"
637 );
638 assert_eq!(results[0], *mid.as_uuid());
639 }
640
641 #[tokio::test]
644 async fn test_date_range_query() {
645 let (idx, _dir) = make_search_index();
646
647 let raw_recent = concat!(
651 "From: alice@example.com\r\n",
652 "Date: Sun, 1 Jun 2025 00:00:00 +0000\r\n",
653 "Subject: Recent\r\n",
654 "Content-Type: text/plain\r\n",
655 "\r\n",
656 "Recent message.\r\n",
657 );
658 let raw_old = concat!(
659 "From: bob@example.com\r\n",
660 "Date: Mon, 1 Jan 2024 00:00:00 +0000\r\n",
661 "Subject: Old\r\n",
662 "Content-Type: text/plain\r\n",
663 "\r\n",
664 "Old message.\r\n",
665 );
666
667 let mid_recent = index_one(&idx, raw_recent).await;
668 let _mid_old = index_one(&idx, raw_old).await;
669
670 let schema = idx.schema();
671
672 let ts_2025: i64 = 1_735_689_600;
674 let query = search_query_to_tantivy(
675 &SearchQuery::Condition(SearchCondition {
676 field: SearchField::Subject, comparator: SearchComparator::DateSince(ts_2025),
678 }),
679 &schema,
680 );
681 let results = idx.search_by_query(query, 10).expect("search");
682 assert!(
683 !results.is_empty(),
684 "DateSince should match at least one message"
685 );
686 assert!(
687 results.contains(mid_recent.as_uuid()),
688 "DateSince should include the 2025 message"
689 );
690
691 let query_before = search_query_to_tantivy(
693 &SearchQuery::Condition(SearchCondition {
694 field: SearchField::Subject,
695 comparator: SearchComparator::DateBefore(ts_2025),
696 }),
697 &schema,
698 );
699 let results_before = idx
700 .search_by_query(query_before, 10)
701 .expect("search before");
702 assert!(
703 !results_before.contains(mid_recent.as_uuid()),
704 "DateBefore should exclude the 2025 message"
705 );
706 }
707
708 #[tokio::test]
711 async fn test_full_text_query() {
712 let (idx, _dir) = make_search_index();
713
714 let raw_subject = concat!(
716 "From: alice@example.com\r\n",
717 "Subject: Quarterly Report\r\n",
718 "Content-Type: text/plain\r\n",
719 "\r\n",
720 "See attached.\r\n",
721 );
722 let raw_body = concat!(
723 "From: bob@example.com\r\n",
724 "Subject: Meeting notes\r\n",
725 "Content-Type: text/plain\r\n",
726 "\r\n",
727 "Quarterly budget review.\r\n",
728 );
729
730 let mid1 = index_one(&idx, raw_subject).await;
731 let mid2 = index_one(&idx, raw_body).await;
732
733 let schema = idx.schema();
734 let filter = JmapSearchFilter {
735 text: Some("quarterly".to_string()),
736 ..Default::default()
737 };
738 let query = jmap_filter_to_tantivy(&filter, &schema);
739 let results = idx.search_by_query(query, 10).expect("search");
740
741 assert!(
742 results.contains(mid1.as_uuid()),
743 "full-text query should match subject field"
744 );
745 assert!(
746 results.contains(mid2.as_uuid()),
747 "full-text query should match body field"
748 );
749 }
750
751 #[tokio::test]
754 async fn test_phrase_query() {
755 let (idx, _dir) = make_search_index();
756
757 let raw_match = concat!(
759 "From: alice@example.com\r\n",
760 "Subject: Hello World Test\r\n",
761 "Content-Type: text/plain\r\n",
762 "\r\n",
763 "The phrase hello world appears here.\r\n",
764 );
765 let raw_no_match = concat!(
766 "From: alice@example.com\r\n",
767 "Subject: World Hello Test\r\n",
768 "Content-Type: text/plain\r\n",
769 "\r\n",
770 "The words world and hello appear in reverse.\r\n",
771 );
772
773 let mid_match = index_one(&idx, raw_match).await;
774 let mid_no_match = index_one(&idx, raw_no_match).await;
775
776 let schema = idx.schema();
777 let query = search_query_to_tantivy(
779 &SearchQuery::Condition(SearchCondition {
780 field: SearchField::Body,
781 comparator: SearchComparator::Contains("\"hello world\"".to_string()),
782 }),
783 &schema,
784 );
785
786 let results = idx.search_by_query(query, 10).expect("search");
787 assert!(
788 results.contains(mid_match.as_uuid()),
789 "phrase query must match the message with adjacent 'hello world'"
790 );
791 assert!(
792 !results.contains(mid_no_match.as_uuid()),
793 "phrase query must NOT match 'world hello' (reversed order)"
794 );
795 }
796
797 #[tokio::test]
800 async fn test_fuzzy_query() {
801 let (idx, _dir) = make_search_index();
802
803 let raw = concat!(
804 "From: alice@example.com\r\n",
805 "Subject: Typo test\r\n",
806 "Content-Type: text/plain\r\n",
807 "\r\n",
808 "The word helo is misspelled.\r\n",
809 );
810 let mid = index_one(&idx, raw).await;
811
812 let schema = idx.schema();
813 let query = search_query_to_tantivy(
815 &SearchQuery::Condition(SearchCondition {
816 field: SearchField::Body,
817 comparator: SearchComparator::Contains("hello~1".to_string()),
818 }),
819 &schema,
820 );
821
822 let results = idx.search_by_query(query, 10).expect("search");
823 assert!(
824 results.contains(mid.as_uuid()),
825 "fuzzy query hello~1 should match 'helo'"
826 );
827 }
828
829 #[tokio::test]
832 async fn test_boolean_and() {
833 let (idx, _dir) = make_search_index();
834
835 let raw_both = concat!(
836 "From: alice@example.com\r\n",
837 "Subject: Budget Review\r\n",
838 "Content-Type: text/plain\r\n",
839 "\r\n",
840 "Quarterly budget review.\r\n",
841 );
842 let raw_subject_only = concat!(
843 "From: zach@example.com\r\n",
844 "Subject: Budget Review\r\n",
845 "Content-Type: text/plain\r\n",
846 "\r\n",
847 "Different content.\r\n",
848 );
849
850 let mid_both = index_one(&idx, raw_both).await;
851 let mid_subject_only = index_one(&idx, raw_subject_only).await;
852
853 let schema = idx.schema();
854 let query = search_query_to_tantivy(
856 &SearchQuery::And(vec![
857 SearchQuery::Condition(SearchCondition {
858 field: SearchField::Subject,
859 comparator: SearchComparator::Contains("budget".to_string()),
860 }),
861 SearchQuery::Condition(SearchCondition {
862 field: SearchField::From,
863 comparator: SearchComparator::Contains("alice".to_string()),
864 }),
865 ]),
866 &schema,
867 );
868
869 let results = idx.search_by_query(query, 10).expect("search");
870 assert!(
871 results.contains(mid_both.as_uuid()),
872 "AND query should match the message with both 'budget' in subject and 'alice' in from"
873 );
874 assert!(
875 !results.contains(mid_subject_only.as_uuid()),
876 "AND query should NOT match message where from is 'zach', not 'alice'"
877 );
878 }
879
880 #[test]
883 fn test_translate_condition_date_since_does_not_panic() {
884 let schema = make_schema();
885 let cond = SearchCondition {
886 field: SearchField::Subject,
887 comparator: SearchComparator::DateSince(1_735_689_600),
888 };
889 let _q = translate_condition(&cond, &schema);
891 }
892
893 #[test]
894 fn test_jmap_filter_empty_returns_allquery_type() {
895 let schema = make_schema();
896 let filter = JmapSearchFilter::default();
897 let q = jmap_filter_to_tantivy(&filter, &schema);
899 let _ = q.box_clone();
901 }
902}