Skip to main content

rusmes_search/
query_translator.rs

1//! Query translation layer: protocol search filters → Tantivy queries
2//!
3//! This module provides two main translation paths:
4//!
5//! 1. [`search_query_to_tantivy`] — converts the protocol-agnostic [`SearchQuery`]
6//!    intermediary type into a Tantivy [`Query`] ready to be executed against the index.
7//!    IMAP / POP3 / other protocol handlers convert their native filter structs into
8//!    [`SearchQuery`] first, then call this function.
9//!
10//! 2. [`jmap_filter_to_tantivy`] — converts a [`JmapSearchFilter`] (reflecting
11//!    JMAP RFC 8621 `EmailFilterCondition` fields) directly into a Tantivy query.
12//!
13//! Both translators handle:
14//! - Per-field term searches with tokenizer-aware lowercasing
15//! - Full-text search across multiple fields (OR union)
16//! - Date-range queries on the i64 `date` field
17//! - Boolean AND / OR / NOT composition
18//! - Phrase detection (`"hello world"` in double-quotes) → `PhraseQuery`
19//! - Fuzzy term matching (`hello~2`) → `FuzzyTermQuery`
20//!
21//! # Tokenization note
22//!
23//! Tantivy's default `TEXT` tokenizer lowercases all tokens at index time.
24//! Every search term fed into a `TermQuery`, `PhraseQuery`, or `FuzzyTermQuery`
25//! on a `TEXT` field **must** therefore be lowercased before the `Term` is
26//! constructed, or the query will silently match nothing.
27
28use std::ops::Bound;
29
30use tantivy::{
31    query::{
32        AllQuery, BooleanQuery, FuzzyTermQuery, Occur, PhraseQuery, Query, RangeQuery, TermQuery,
33    },
34    schema::{IndexRecordOption, Schema},
35    Term,
36};
37
38// ─── Protocol-agnostic intermediary types ────────────────────────────────────
39
40/// The field(s) a search condition targets.
41#[derive(Debug, Clone)]
42pub enum SearchField {
43    /// The `subject` index field.
44    Subject,
45    /// The `from` index field.
46    From,
47    /// The `to` index field.
48    To,
49    /// The `body` index field.
50    Body,
51    /// The `header_values` index field (Cc, Bcc, Reply-To, etc.).
52    Header(String),
53    /// Shorthand for `header_values` targeting `Cc`.
54    Cc,
55    /// Shorthand for `header_values` targeting `Bcc`.
56    Bcc,
57    /// Full-text: searches `subject`, `body`, and `header_values` simultaneously.
58    FullText,
59    /// The `attachment_filenames` index field.
60    AttachmentFilenames,
61}
62
63/// The comparison / match style for a search condition.
64#[derive(Debug, Clone)]
65pub enum SearchComparator {
66    /// Match any message where the field contains the given string.
67    Contains(String),
68    /// Exact equality match (lowercased single token).
69    Equals(String),
70    /// Date ≥ Unix timestamp.
71    DateSince(i64),
72    /// Date < Unix timestamp.
73    DateBefore(i64),
74    /// Date is on a specific day (`[ts, ts + 86400)`).
75    DateOn(i64),
76}
77
78/// A single field+comparator condition.
79#[derive(Debug, Clone)]
80pub struct SearchCondition {
81    pub field: SearchField,
82    pub comparator: SearchComparator,
83}
84
85/// Protocol-agnostic search query tree.
86///
87/// Callers (IMAP handler, POP3 handler, …) convert their native filter types
88/// into this enum and pass it to [`search_query_to_tantivy`].
89#[derive(Debug, Clone)]
90pub enum SearchQuery {
91    /// A single field condition.
92    Condition(SearchCondition),
93    /// All sub-queries must match.
94    And(Vec<SearchQuery>),
95    /// At least one sub-query must match.
96    Or(Vec<SearchQuery>),
97    /// The sub-query must NOT match (combined with `AllQuery` positive clause).
98    Not(Box<SearchQuery>),
99    /// Match every document.
100    All,
101    /// Match no document (empty BooleanQuery).
102    None,
103}
104
105// ─── JMAP intermediary ────────────────────────────────────────────────────────
106
107/// Simplified JMAP-derived filter for Tantivy translation.
108///
109/// Mirrors the fields of `rusmes_jmap::types::EmailFilterCondition` that are
110/// searchable via Tantivy. The JMAP handler constructs this from the richer
111/// JMAP type.
112#[derive(Debug, Clone, Default)]
113pub struct JmapSearchFilter {
114    /// RFC 8621 `text` — full-text search across all searchable fields.
115    pub text: Option<String>,
116    /// RFC 8621 `from`.
117    pub from: Option<String>,
118    /// RFC 8621 `to`.
119    pub to: Option<String>,
120    /// RFC 8621 `cc`.
121    pub cc: Option<String>,
122    /// RFC 8621 `bcc`.
123    pub bcc: Option<String>,
124    /// RFC 8621 `subject`.
125    pub subject: Option<String>,
126    /// RFC 8621 `body`.
127    pub body: Option<String>,
128    /// RFC 8621 `before` — Unix timestamp exclusive upper bound.
129    pub before: Option<i64>,
130    /// RFC 8621 `after` — Unix timestamp inclusive lower bound.
131    pub after: Option<i64>,
132}
133
134// ─── Term kind detection ─────────────────────────────────────────────────────
135
136/// Internal classification for how a raw search string should be interpreted.
137#[derive(Debug, Clone)]
138pub enum TermKind {
139    /// An exact single token (already lowercased by the caller).
140    Exact(String),
141    /// A phrase: the string was wrapped in double-quotes.
142    /// The inner vector holds the individual tokens (already lowercased).
143    Phrase(Vec<String>),
144    /// A fuzzy match with edit distance `distance`.
145    Fuzzy {
146        /// The term to match (already lowercased).
147        term: String,
148        /// Maximum edit distance (Levenshtein).
149        distance: u8,
150    },
151}
152
153/// Parse a raw search string into a [`TermKind`].
154///
155/// # Rules
156/// - If the string starts and ends with `"`, the inner text is tokenised on
157///   whitespace and returned as a [`TermKind::Phrase`].
158/// - If the string ends with `~N` where N is a single decimal digit, it is
159///   returned as [`TermKind::Fuzzy`] with that edit distance.
160/// - Otherwise the whole string is returned as [`TermKind::Exact`].
161///
162/// All tokens / terms are lowercased so they match the Tantivy `TEXT`
163/// tokenizer output.
164pub fn parse_search_term(s: &str) -> TermKind {
165    let trimmed = s.trim();
166
167    // Phrase: starts and ends with double-quote and has content between them.
168    if trimmed.starts_with('"') && trimmed.ends_with('"') && trimmed.len() >= 2 {
169        let inner = &trimmed[1..trimmed.len() - 1];
170        let tokens: Vec<String> = inner
171            .split_whitespace()
172            .filter(|t| !t.is_empty())
173            .map(|t| t.to_lowercase())
174            .collect();
175        if !tokens.is_empty() {
176            return TermKind::Phrase(tokens);
177        }
178        // Degenerate empty phrase — fall through to Exact.
179    }
180
181    // Fuzzy: ends with ~N where N is a single digit.
182    if let Some((base, dist_str)) = trimmed.rsplit_once('~') {
183        if dist_str.len() == 1 {
184            if let Ok(dist) = dist_str.parse::<u8>() {
185                if !base.is_empty() {
186                    return TermKind::Fuzzy {
187                        term: base.to_lowercase(),
188                        distance: dist,
189                    };
190                }
191            }
192        }
193    }
194
195    // Default: exact / multi-word (handle multi-word as phrase without quotes).
196    let lower = trimmed.to_lowercase();
197    let words: Vec<&str> = lower.split_whitespace().collect();
198    if words.len() > 1 {
199        // Multi-word without quotes → treat as phrase.
200        return TermKind::Phrase(words.into_iter().map(String::from).collect());
201    }
202
203    TermKind::Exact(lower)
204}
205
206// ─── Tantivy query building helpers ──────────────────────────────────────────
207
208/// Resolve a field name in `schema` and return the field handle.
209///
210/// Returns `None` if the field does not exist in the schema (should never
211/// happen for well-formed schemas built by `TantivySearchIndex::build_schema`).
212fn resolve_field(schema: &Schema, name: &str) -> Option<tantivy::schema::Field> {
213    schema.get_field(name).ok()
214}
215
216/// Build a single-term or phrase query on a text field for the given search string.
217///
218/// Parses `value` through [`parse_search_term`] and constructs the appropriate
219/// Tantivy query variant. Returns `None` if the field is not found in the
220/// schema or the term list is empty.
221fn build_text_query(schema: &Schema, field_name: &str, value: &str) -> Option<Box<dyn Query>> {
222    let field = resolve_field(schema, field_name)?;
223    match parse_search_term(value) {
224        TermKind::Exact(word) if !word.is_empty() => {
225            let term = Term::from_field_text(field, &word);
226            Some(Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)))
227        }
228        TermKind::Phrase(tokens) if !tokens.is_empty() => {
229            if tokens.len() == 1 {
230                let term = Term::from_field_text(field, &tokens[0]);
231                Some(Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)))
232            } else {
233                let terms: Vec<Term> = tokens
234                    .iter()
235                    .map(|t| Term::from_field_text(field, t))
236                    .collect();
237                Some(Box::new(PhraseQuery::new(terms)))
238            }
239        }
240        TermKind::Fuzzy { term, distance } if !term.is_empty() => {
241            let t = Term::from_field_text(field, &term);
242            Some(Box::new(FuzzyTermQuery::new(t, distance, true)))
243        }
244        _ => None,
245    }
246}
247
248/// Build a full-text query (OR union) across `subject`, `body`, and
249/// `header_values` fields for the given search string.
250fn build_fulltext_query(schema: &Schema, value: &str) -> Box<dyn Query> {
251    let field_names = ["subject", "body", "header_values"];
252    let mut clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
253
254    for name in &field_names {
255        if let Some(q) = build_text_query(schema, name, value) {
256            clauses.push((Occur::Should, q));
257        }
258    }
259
260    if clauses.is_empty() {
261        // Fallback: match everything if no field could be found.
262        Box::new(AllQuery)
263    } else {
264        Box::new(BooleanQuery::union_with_minimum_required_clauses(
265            clauses
266                .into_iter()
267                .map(|(_, q)| q)
268                .collect::<Vec<Box<dyn Query>>>(),
269            1,
270        ))
271    }
272}
273
274/// Map a [`SearchField`] to the underlying Tantivy field name.
275fn field_name_for(field: &SearchField) -> &'static str {
276    match field {
277        SearchField::Subject => "subject",
278        SearchField::From => "from",
279        SearchField::To => "to",
280        SearchField::Body => "body",
281        SearchField::Header(_) | SearchField::Cc | SearchField::Bcc => "header_values",
282        SearchField::FullText => "body", // handled separately in the caller
283        SearchField::AttachmentFilenames => "attachment_filenames",
284    }
285}
286
287// ─── Primary translation entry point ─────────────────────────────────────────
288
289/// Translate a [`SearchQuery`] into a Tantivy [`Query`] using the given schema.
290///
291/// This is the main entry point for IMAP (and other protocol) search handlers.
292/// The returned `Box<dyn Query>` can be passed directly to
293/// `Searcher::search(&query, &TopDocs::…)`.
294///
295/// # Invariants
296///
297/// - [`SearchQuery::All`] → [`AllQuery`] (matches every document).
298/// - [`SearchQuery::None`] → an empty MUST-NOT boolean (matches nothing).
299/// - [`SearchQuery::Not`] wraps its inner in a `MustNot` + `Must: AllQuery`
300///   pair, because Tantivy requires at least one positive clause.
301pub fn search_query_to_tantivy(query: &SearchQuery, schema: &Schema) -> Box<dyn Query> {
302    match query {
303        SearchQuery::All => Box::new(AllQuery),
304
305        SearchQuery::None => {
306            // Matches nothing: use an impossible TermQuery on a sentinel value
307            // that can never appear in real data (null-byte prefix).
308            if let Some(f) = resolve_field(schema, "message_id") {
309                let t = Term::from_field_text(f, "\x00__none__\x00");
310                Box::new(TermQuery::new(t, IndexRecordOption::Basic)) as Box<dyn Query>
311            } else {
312                Box::new(AllQuery) as Box<dyn Query>
313            }
314        }
315
316        SearchQuery::Condition(cond) => translate_condition(cond, schema),
317
318        SearchQuery::And(sub) => {
319            if sub.is_empty() {
320                return Box::new(AllQuery);
321            }
322            let clauses: Vec<(Occur, Box<dyn Query>)> = sub
323                .iter()
324                .map(|q| (Occur::Must, search_query_to_tantivy(q, schema)))
325                .collect();
326            Box::new(BooleanQuery::new(clauses))
327        }
328
329        SearchQuery::Or(sub) => {
330            if sub.is_empty() {
331                return Box::new(AllQuery);
332            }
333            let sub_queries: Vec<Box<dyn Query>> = sub
334                .iter()
335                .map(|q| search_query_to_tantivy(q, schema))
336                .collect();
337            Box::new(BooleanQuery::union_with_minimum_required_clauses(
338                sub_queries,
339                1,
340            ))
341        }
342
343        SearchQuery::Not(inner) => {
344            // Tantivy requires at least one positive clause in a BooleanQuery.
345            // We add Must(AllQuery) so the MustNot clause is effective.
346            let positive: Box<dyn Query> = Box::new(AllQuery);
347            let negative = search_query_to_tantivy(inner, schema);
348            Box::new(BooleanQuery::new(vec![
349                (Occur::Must, positive),
350                (Occur::MustNot, negative),
351            ]))
352        }
353    }
354}
355
356/// Translate a single [`SearchCondition`] into a Tantivy query.
357fn translate_condition(cond: &SearchCondition, schema: &Schema) -> Box<dyn Query> {
358    match &cond.comparator {
359        SearchComparator::Contains(value) | SearchComparator::Equals(value) => match &cond.field {
360            SearchField::FullText => build_fulltext_query(schema, value),
361            other => {
362                let name = field_name_for(other);
363                build_text_query(schema, name, value).unwrap_or_else(|| Box::new(AllQuery))
364            }
365        },
366
367        SearchComparator::DateSince(ts) => {
368            if let Some(date_field) = resolve_field(schema, "date") {
369                let lower = Term::from_field_i64(date_field, *ts);
370                Box::new(RangeQuery::new(Bound::Included(lower), Bound::Unbounded))
371            } else {
372                Box::new(AllQuery)
373            }
374        }
375
376        SearchComparator::DateBefore(ts) => {
377            if let Some(date_field) = resolve_field(schema, "date") {
378                let upper = Term::from_field_i64(date_field, *ts);
379                Box::new(RangeQuery::new(Bound::Unbounded, Bound::Excluded(upper)))
380            } else {
381                Box::new(AllQuery)
382            }
383        }
384
385        SearchComparator::DateOn(ts) => {
386            // Match messages where `ts <= date < ts + 86400` (one full day).
387            if let Some(date_field) = resolve_field(schema, "date") {
388                let lower = Term::from_field_i64(date_field, *ts);
389                let upper = Term::from_field_i64(date_field, ts + 86_400);
390                Box::new(RangeQuery::new(
391                    Bound::Included(lower),
392                    Bound::Excluded(upper),
393                ))
394            } else {
395                Box::new(AllQuery)
396            }
397        }
398    }
399}
400
401// ─── JMAP translation ─────────────────────────────────────────────────────────
402
403/// Translate a [`JmapSearchFilter`] into a Tantivy [`Query`].
404///
405/// Each populated field in the filter produces a MUST clause; all clauses are
406/// ANDed together. An empty filter (all `None`) returns [`AllQuery`].
407///
408/// The `text` field (RFC 8621 full-text) produces a SHOULD union across
409/// `subject`, `body`, and `header_values`.
410pub fn jmap_filter_to_tantivy(filter: &JmapSearchFilter, schema: &Schema) -> Box<dyn Query> {
411    let mut clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();
412
413    // Full-text search across subject, body, header_values.
414    if let Some(text) = &filter.text {
415        if !text.is_empty() {
416            clauses.push((Occur::Must, build_fulltext_query(schema, text)));
417        }
418    }
419
420    // Per-field text conditions.
421    let field_map: &[(&Option<String>, &str)] = &[
422        (&filter.from, "from"),
423        (&filter.to, "to"),
424        (&filter.subject, "subject"),
425        (&filter.body, "body"),
426    ];
427
428    for (opt, field_name) in field_map {
429        if let Some(val) = opt {
430            if !val.is_empty() {
431                if let Some(q) = build_text_query(schema, field_name, val) {
432                    clauses.push((Occur::Must, q));
433                }
434            }
435        }
436    }
437
438    // Cc and Bcc map to header_values.
439    for val in [&filter.cc, &filter.bcc].into_iter().flatten() {
440        if !val.is_empty() {
441            if let Some(q) = build_text_query(schema, "header_values", val) {
442                clauses.push((Occur::Must, q));
443            }
444        }
445    }
446
447    // Date range (after = inclusive lower bound, before = exclusive upper bound).
448    if let (Some(after), Some(before)) = (filter.after, filter.before) {
449        if let Some(date_field) = resolve_field(schema, "date") {
450            let lower = Term::from_field_i64(date_field, after);
451            let upper = Term::from_field_i64(date_field, before);
452            let range: Box<dyn Query> = Box::new(RangeQuery::new(
453                Bound::Included(lower),
454                Bound::Excluded(upper),
455            ));
456            clauses.push((Occur::Must, range));
457        }
458    } else if let Some(after) = filter.after {
459        if let Some(date_field) = resolve_field(schema, "date") {
460            let lower = Term::from_field_i64(date_field, after);
461            let range: Box<dyn Query> =
462                Box::new(RangeQuery::new(Bound::Included(lower), Bound::Unbounded));
463            clauses.push((Occur::Must, range));
464        }
465    } else if let Some(before) = filter.before {
466        if let Some(date_field) = resolve_field(schema, "date") {
467            let upper = Term::from_field_i64(date_field, before);
468            let range: Box<dyn Query> =
469                Box::new(RangeQuery::new(Bound::Unbounded, Bound::Excluded(upper)));
470            clauses.push((Occur::Must, range));
471        }
472    }
473
474    if clauses.is_empty() {
475        Box::new(AllQuery)
476    } else {
477        Box::new(BooleanQuery::new(clauses))
478    }
479}
480
481// ─── Tests ────────────────────────────────────────────────────────────────────
482
483#[cfg(test)]
484mod tests {
485    use super::*;
486    use crate::SearchIndex;
487    use bytes::Bytes;
488    use rusmes_proto::mail::Mail;
489    use rusmes_proto::message::{HeaderMap, MessageBody, MessageId, MimeMessage};
490
491    // ── Helpers ───────────────────────────────────────────────────────────────
492
493    /// Build a minimal schema identical to the one used by `TantivySearchIndex`.
494    /// Used by unit tests that only need the schema (no actual index).
495    fn make_schema() -> tantivy::schema::Schema {
496        use tantivy::schema::{NumericOptions, STORED, TEXT};
497        let mut b = tantivy::schema::SchemaBuilder::default();
498        b.add_text_field("message_id", STORED);
499        b.add_text_field("from", TEXT | STORED);
500        b.add_text_field("to", TEXT | STORED);
501        b.add_text_field("subject", TEXT | STORED);
502        b.add_text_field("body", TEXT);
503        b.add_text_field("attachment_filenames", TEXT | STORED);
504        b.add_text_field("header_values", TEXT);
505        b.add_i64_field("date", NumericOptions::default().set_indexed().set_stored());
506        b.build()
507    }
508
509    // ─── parse_search_term ────────────────────────────────────────────────────
510
511    #[test]
512    fn test_parse_exact() {
513        match parse_search_term("hello") {
514            TermKind::Exact(s) => assert_eq!(s, "hello"),
515            other => panic!("expected Exact, got {other:?}"),
516        }
517    }
518
519    #[test]
520    fn test_parse_exact_lowercases() {
521        match parse_search_term("Hello") {
522            TermKind::Exact(s) => assert_eq!(s, "hello"),
523            other => panic!("expected Exact, got {other:?}"),
524        }
525    }
526
527    #[test]
528    fn test_parse_phrase() {
529        match parse_search_term("\"hello world\"") {
530            TermKind::Phrase(tokens) => {
531                assert_eq!(tokens, vec!["hello", "world"]);
532            }
533            other => panic!("expected Phrase, got {other:?}"),
534        }
535    }
536
537    #[test]
538    fn test_parse_phrase_lowercases() {
539        match parse_search_term("\"Hello World\"") {
540            TermKind::Phrase(tokens) => {
541                assert_eq!(tokens, vec!["hello", "world"]);
542            }
543            other => panic!("expected Phrase, got {other:?}"),
544        }
545    }
546
547    #[test]
548    fn test_parse_fuzzy() {
549        match parse_search_term("hello~2") {
550            TermKind::Fuzzy { term, distance } => {
551                assert_eq!(term, "hello");
552                assert_eq!(distance, 2);
553            }
554            other => panic!("expected Fuzzy, got {other:?}"),
555        }
556    }
557
558    #[test]
559    fn test_parse_fuzzy_lowercases() {
560        match parse_search_term("Hello~1") {
561            TermKind::Fuzzy { term, distance } => {
562                assert_eq!(term, "hello");
563                assert_eq!(distance, 1);
564            }
565            other => panic!("expected Fuzzy, got {other:?}"),
566        }
567    }
568
569    #[test]
570    fn test_parse_multiword_becomes_phrase() {
571        match parse_search_term("hello world") {
572            TermKind::Phrase(tokens) => {
573                assert_eq!(tokens, vec!["hello", "world"]);
574            }
575            other => panic!("expected Phrase for multi-word, got {other:?}"),
576        }
577    }
578
579    // ─── Full-pipeline tests using TantivySearchIndex ────────────────────────
580
581    fn make_mail_raw(raw: &str) -> (MessageId, Mail) {
582        let message_id = MessageId::new();
583        let data = raw.as_bytes();
584        let message = MimeMessage::parse_from_bytes(data).unwrap_or_else(|_| {
585            let mut hdr = HeaderMap::new();
586            hdr.insert("content-type", "text/plain");
587            MimeMessage::new(hdr, MessageBody::Small(Bytes::from(raw.to_owned())))
588        });
589        let mail = Mail::new(None, vec![], message, None, None);
590        (message_id, mail)
591    }
592
593    fn make_search_index() -> (crate::TantivySearchIndex, tempfile::TempDir) {
594        let dir = tempfile::TempDir::new().expect("temp dir");
595        let idx = crate::TantivySearchIndex::new(dir.path()).expect("create index");
596        (idx, dir)
597    }
598
599    /// Index a message into `idx`, commit, and return the message id.
600    async fn index_one(idx: &crate::TantivySearchIndex, raw: &str) -> MessageId {
601        let (mid, mail) = make_mail_raw(raw);
602        idx.index_message(&mid, &mail).await.expect("index");
603        idx.commit().await.expect("commit");
604        mid
605    }
606
607    // ── test_subject_query ────────────────────────────────────────────────────
608
609    #[tokio::test]
610    async fn test_subject_query() {
611        let (idx, _dir) = make_search_index();
612
613        // Note: Subject is "Hello World" — mixed-case, so translator must lowercase.
614        let raw = concat!(
615            "From: sender@example.com\r\n",
616            "To: recv@example.com\r\n",
617            "Subject: Hello World\r\n",
618            "Content-Type: text/plain\r\n",
619            "\r\n",
620            "Some body text.\r\n",
621        );
622        let mid = index_one(&idx, raw).await;
623
624        let schema = idx.schema();
625        let query = search_query_to_tantivy(
626            &SearchQuery::Condition(SearchCondition {
627                field: SearchField::Subject,
628                comparator: SearchComparator::Contains("Hello".to_string()),
629            }),
630            &schema,
631        );
632
633        let results = idx.search_by_query(query, 10).expect("search");
634        assert!(
635            !results.is_empty(),
636            "subject query should return the indexed message"
637        );
638        assert_eq!(results[0], *mid.as_uuid());
639    }
640
641    // ── test_date_range_query ─────────────────────────────────────────────────
642
643    #[tokio::test]
644    async fn test_date_range_query() {
645        let (idx, _dir) = make_search_index();
646
647        // Two messages: one from 2025, one from 2024.
648        // 2025-06-01T00:00:00Z = 1748736000
649        // 2024-01-01T00:00:00Z = 1704067200
650        let raw_recent = concat!(
651            "From: alice@example.com\r\n",
652            "Date: Sun, 1 Jun 2025 00:00:00 +0000\r\n",
653            "Subject: Recent\r\n",
654            "Content-Type: text/plain\r\n",
655            "\r\n",
656            "Recent message.\r\n",
657        );
658        let raw_old = concat!(
659            "From: bob@example.com\r\n",
660            "Date: Mon, 1 Jan 2024 00:00:00 +0000\r\n",
661            "Subject: Old\r\n",
662            "Content-Type: text/plain\r\n",
663            "\r\n",
664            "Old message.\r\n",
665        );
666
667        let mid_recent = index_one(&idx, raw_recent).await;
668        let _mid_old = index_one(&idx, raw_old).await;
669
670        let schema = idx.schema();
671
672        // DateSince 2025-01-01 (1735689600) should match only the recent message.
673        let ts_2025: i64 = 1_735_689_600;
674        let query = search_query_to_tantivy(
675            &SearchQuery::Condition(SearchCondition {
676                field: SearchField::Subject, // field doesn't matter for date query
677                comparator: SearchComparator::DateSince(ts_2025),
678            }),
679            &schema,
680        );
681        let results = idx.search_by_query(query, 10).expect("search");
682        assert!(
683            !results.is_empty(),
684            "DateSince should match at least one message"
685        );
686        assert!(
687            results.contains(mid_recent.as_uuid()),
688            "DateSince should include the 2025 message"
689        );
690
691        // DateBefore 2025-01-01 should match only the old message.
692        let query_before = search_query_to_tantivy(
693            &SearchQuery::Condition(SearchCondition {
694                field: SearchField::Subject,
695                comparator: SearchComparator::DateBefore(ts_2025),
696            }),
697            &schema,
698        );
699        let results_before = idx
700            .search_by_query(query_before, 10)
701            .expect("search before");
702        assert!(
703            !results_before.contains(mid_recent.as_uuid()),
704            "DateBefore should exclude the 2025 message"
705        );
706    }
707
708    // ── test_full_text_query ──────────────────────────────────────────────────
709
710    #[tokio::test]
711    async fn test_full_text_query() {
712        let (idx, _dir) = make_search_index();
713
714        // One message has the word in Subject, another in body.
715        let raw_subject = concat!(
716            "From: alice@example.com\r\n",
717            "Subject: Quarterly Report\r\n",
718            "Content-Type: text/plain\r\n",
719            "\r\n",
720            "See attached.\r\n",
721        );
722        let raw_body = concat!(
723            "From: bob@example.com\r\n",
724            "Subject: Meeting notes\r\n",
725            "Content-Type: text/plain\r\n",
726            "\r\n",
727            "Quarterly budget review.\r\n",
728        );
729
730        let mid1 = index_one(&idx, raw_subject).await;
731        let mid2 = index_one(&idx, raw_body).await;
732
733        let schema = idx.schema();
734        let filter = JmapSearchFilter {
735            text: Some("quarterly".to_string()),
736            ..Default::default()
737        };
738        let query = jmap_filter_to_tantivy(&filter, &schema);
739        let results = idx.search_by_query(query, 10).expect("search");
740
741        assert!(
742            results.contains(mid1.as_uuid()),
743            "full-text query should match subject field"
744        );
745        assert!(
746            results.contains(mid2.as_uuid()),
747            "full-text query should match body field"
748        );
749    }
750
751    // ── test_phrase_query ──────────────────────────────────────────────────────
752
753    #[tokio::test]
754    async fn test_phrase_query() {
755        let (idx, _dir) = make_search_index();
756
757        // Two messages: one contains the exact phrase, one has the words reversed.
758        let raw_match = concat!(
759            "From: alice@example.com\r\n",
760            "Subject: Hello World Test\r\n",
761            "Content-Type: text/plain\r\n",
762            "\r\n",
763            "The phrase hello world appears here.\r\n",
764        );
765        let raw_no_match = concat!(
766            "From: alice@example.com\r\n",
767            "Subject: World Hello Test\r\n",
768            "Content-Type: text/plain\r\n",
769            "\r\n",
770            "The words world and hello appear in reverse.\r\n",
771        );
772
773        let mid_match = index_one(&idx, raw_match).await;
774        let mid_no_match = index_one(&idx, raw_no_match).await;
775
776        let schema = idx.schema();
777        // Phrase query: "hello world" (adjacent tokens, must match in order).
778        let query = search_query_to_tantivy(
779            &SearchQuery::Condition(SearchCondition {
780                field: SearchField::Body,
781                comparator: SearchComparator::Contains("\"hello world\"".to_string()),
782            }),
783            &schema,
784        );
785
786        let results = idx.search_by_query(query, 10).expect("search");
787        assert!(
788            results.contains(mid_match.as_uuid()),
789            "phrase query must match the message with adjacent 'hello world'"
790        );
791        assert!(
792            !results.contains(mid_no_match.as_uuid()),
793            "phrase query must NOT match 'world hello' (reversed order)"
794        );
795    }
796
797    // ── test_fuzzy_query ──────────────────────────────────────────────────────
798
799    #[tokio::test]
800    async fn test_fuzzy_query() {
801        let (idx, _dir) = make_search_index();
802
803        let raw = concat!(
804            "From: alice@example.com\r\n",
805            "Subject: Typo test\r\n",
806            "Content-Type: text/plain\r\n",
807            "\r\n",
808            "The word helo is misspelled.\r\n",
809        );
810        let mid = index_one(&idx, raw).await;
811
812        let schema = idx.schema();
813        // Fuzzy: "hello~1" should match "helo" (1 edit distance).
814        let query = search_query_to_tantivy(
815            &SearchQuery::Condition(SearchCondition {
816                field: SearchField::Body,
817                comparator: SearchComparator::Contains("hello~1".to_string()),
818            }),
819            &schema,
820        );
821
822        let results = idx.search_by_query(query, 10).expect("search");
823        assert!(
824            results.contains(mid.as_uuid()),
825            "fuzzy query hello~1 should match 'helo'"
826        );
827    }
828
829    // ── test_boolean_and ──────────────────────────────────────────────────────
830
831    #[tokio::test]
832    async fn test_boolean_and() {
833        let (idx, _dir) = make_search_index();
834
835        let raw_both = concat!(
836            "From: alice@example.com\r\n",
837            "Subject: Budget Review\r\n",
838            "Content-Type: text/plain\r\n",
839            "\r\n",
840            "Quarterly budget review.\r\n",
841        );
842        let raw_subject_only = concat!(
843            "From: zach@example.com\r\n",
844            "Subject: Budget Review\r\n",
845            "Content-Type: text/plain\r\n",
846            "\r\n",
847            "Different content.\r\n",
848        );
849
850        let mid_both = index_one(&idx, raw_both).await;
851        let mid_subject_only = index_one(&idx, raw_subject_only).await;
852
853        let schema = idx.schema();
854        // AND: Subject contains "budget" AND From contains "alice".
855        let query = search_query_to_tantivy(
856            &SearchQuery::And(vec![
857                SearchQuery::Condition(SearchCondition {
858                    field: SearchField::Subject,
859                    comparator: SearchComparator::Contains("budget".to_string()),
860                }),
861                SearchQuery::Condition(SearchCondition {
862                    field: SearchField::From,
863                    comparator: SearchComparator::Contains("alice".to_string()),
864                }),
865            ]),
866            &schema,
867        );
868
869        let results = idx.search_by_query(query, 10).expect("search");
870        assert!(
871            results.contains(mid_both.as_uuid()),
872            "AND query should match the message with both 'budget' in subject and 'alice' in from"
873        );
874        assert!(
875            !results.contains(mid_subject_only.as_uuid()),
876            "AND query should NOT match message where from is 'zach', not 'alice'"
877        );
878    }
879
880    // ── Unit test for translate_condition helper ───────────────────────────────
881
882    #[test]
883    fn test_translate_condition_date_since_does_not_panic() {
884        let schema = make_schema();
885        let cond = SearchCondition {
886            field: SearchField::Subject,
887            comparator: SearchComparator::DateSince(1_735_689_600),
888        };
889        // Must not panic.
890        let _q = translate_condition(&cond, &schema);
891    }
892
893    #[test]
894    fn test_jmap_filter_empty_returns_allquery_type() {
895        let schema = make_schema();
896        let filter = JmapSearchFilter::default();
897        // Empty filter should build without panic and logically match everything.
898        let q = jmap_filter_to_tantivy(&filter, &schema);
899        // We can't inspect the concrete type, but we can verify it doesn't panic.
900        let _ = q.box_clone();
901    }
902}