rusmes-search 0.1.2

Full-text search for RusMES — Tantivy-backed search index with trait-based abstraction for indexing, querying, and deleting mail messages
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
//! Query translation layer: protocol search filters → Tantivy queries
//!
//! This module provides two main translation paths:
//!
//! 1. [`search_query_to_tantivy`] — converts the protocol-agnostic [`SearchQuery`]
//!    intermediary type into a Tantivy [`Query`] ready to be executed against the index.
//!    IMAP / POP3 / other protocol handlers convert their native filter structs into
//!    [`SearchQuery`] first, then call this function.
//!
//! 2. [`jmap_filter_to_tantivy`] — converts a [`JmapSearchFilter`] (reflecting
//!    JMAP RFC 8621 `EmailFilterCondition` fields) directly into a Tantivy query.
//!
//! Both translators handle:
//! - Per-field term searches with tokenizer-aware lowercasing
//! - Full-text search across multiple fields (OR union)
//! - Date-range queries on the i64 `date` field
//! - Boolean AND / OR / NOT composition
//! - Phrase detection (`"hello world"` in double-quotes) → `PhraseQuery`
//! - Fuzzy term matching (`hello~2`) → `FuzzyTermQuery`
//!
//! # Tokenization note
//!
//! Tantivy's default `TEXT` tokenizer lowercases all tokens at index time.
//! Every search term fed into a `TermQuery`, `PhraseQuery`, or `FuzzyTermQuery`
//! on a `TEXT` field **must** therefore be lowercased before the `Term` is
//! constructed, or the query will silently match nothing.

use std::ops::Bound;

use tantivy::{
    query::{
        AllQuery, BooleanQuery, FuzzyTermQuery, Occur, PhraseQuery, Query, RangeQuery, TermQuery,
    },
    schema::{IndexRecordOption, Schema},
    Term,
};

// ─── Protocol-agnostic intermediary types ────────────────────────────────────

/// The field(s) a search condition targets.
#[derive(Debug, Clone)]
pub enum SearchField {
    /// The `subject` index field.
    Subject,
    /// The `from` index field.
    From,
    /// The `to` index field.
    To,
    /// The `body` index field.
    Body,
    /// The `header_values` index field (Cc, Bcc, Reply-To, etc.).
    Header(String),
    /// Shorthand for `header_values` targeting `Cc`.
    Cc,
    /// Shorthand for `header_values` targeting `Bcc`.
    Bcc,
    /// Full-text: searches `subject`, `body`, and `header_values` simultaneously.
    FullText,
    /// The `attachment_filenames` index field.
    AttachmentFilenames,
}

/// The comparison / match style for a search condition.
#[derive(Debug, Clone)]
pub enum SearchComparator {
    /// Match any message where the field contains the given string.
    Contains(String),
    /// Exact equality match (lowercased single token).
    Equals(String),
    /// Date ≥ Unix timestamp.
    DateSince(i64),
    /// Date < Unix timestamp.
    DateBefore(i64),
    /// Date is on a specific day (`[ts, ts + 86400)`).
    DateOn(i64),
}

/// A single field+comparator condition.
#[derive(Debug, Clone)]
pub struct SearchCondition {
    pub field: SearchField,
    pub comparator: SearchComparator,
}

/// Protocol-agnostic search query tree.
///
/// Callers (IMAP handler, POP3 handler, …) convert their native filter types
/// into this enum and pass it to [`search_query_to_tantivy`].
#[derive(Debug, Clone)]
pub enum SearchQuery {
    /// A single field condition.
    Condition(SearchCondition),
    /// All sub-queries must match.
    And(Vec<SearchQuery>),
    /// At least one sub-query must match.
    Or(Vec<SearchQuery>),
    /// The sub-query must NOT match (combined with `AllQuery` positive clause).
    Not(Box<SearchQuery>),
    /// Match every document.
    All,
    /// Match no document (empty BooleanQuery).
    None,
}

// ─── JMAP intermediary ────────────────────────────────────────────────────────

/// Simplified JMAP-derived filter for Tantivy translation.
///
/// Mirrors the fields of `rusmes_jmap::types::EmailFilterCondition` that are
/// searchable via Tantivy. The JMAP handler constructs this from the richer
/// JMAP type.
#[derive(Debug, Clone, Default)]
pub struct JmapSearchFilter {
    /// RFC 8621 `text` — full-text search across all searchable fields.
    pub text: Option<String>,
    /// RFC 8621 `from`.
    pub from: Option<String>,
    /// RFC 8621 `to`.
    pub to: Option<String>,
    /// RFC 8621 `cc`.
    pub cc: Option<String>,
    /// RFC 8621 `bcc`.
    pub bcc: Option<String>,
    /// RFC 8621 `subject`.
    pub subject: Option<String>,
    /// RFC 8621 `body`.
    pub body: Option<String>,
    /// RFC 8621 `before` — Unix timestamp exclusive upper bound.
    pub before: Option<i64>,
    /// RFC 8621 `after` — Unix timestamp inclusive lower bound.
    pub after: Option<i64>,
}

// ─── Term kind detection ─────────────────────────────────────────────────────

/// Internal classification for how a raw search string should be interpreted.
#[derive(Debug, Clone)]
pub enum TermKind {
    /// An exact single token (already lowercased by the caller).
    Exact(String),
    /// A phrase: the string was wrapped in double-quotes.
    /// The inner vector holds the individual tokens (already lowercased).
    Phrase(Vec<String>),
    /// A fuzzy match with edit distance `distance`.
    Fuzzy {
        /// The term to match (already lowercased).
        term: String,
        /// Maximum edit distance (Levenshtein).
        distance: u8,
    },
}

/// Parse a raw search string into a [`TermKind`].
///
/// # Rules
/// - If the string starts and ends with `"`, the inner text is tokenised on
///   whitespace and returned as a [`TermKind::Phrase`].
/// - If the string ends with `~N` where N is a single decimal digit, it is
///   returned as [`TermKind::Fuzzy`] with that edit distance.
/// - Otherwise the whole string is returned as [`TermKind::Exact`].
///
/// All tokens / terms are lowercased so they match the Tantivy `TEXT`
/// tokenizer output.
pub fn parse_search_term(s: &str) -> TermKind {
    let trimmed = s.trim();

    // Phrase: starts and ends with double-quote and has content between them.
    if trimmed.starts_with('"') && trimmed.ends_with('"') && trimmed.len() >= 2 {
        let inner = &trimmed[1..trimmed.len() - 1];
        let tokens: Vec<String> = inner
            .split_whitespace()
            .filter(|t| !t.is_empty())
            .map(|t| t.to_lowercase())
            .collect();
        if !tokens.is_empty() {
            return TermKind::Phrase(tokens);
        }
        // Degenerate empty phrase — fall through to Exact.
    }

    // Fuzzy: ends with ~N where N is a single digit.
    if let Some((base, dist_str)) = trimmed.rsplit_once('~') {
        if dist_str.len() == 1 {
            if let Ok(dist) = dist_str.parse::<u8>() {
                if !base.is_empty() {
                    return TermKind::Fuzzy {
                        term: base.to_lowercase(),
                        distance: dist,
                    };
                }
            }
        }
    }

    // Default: exact / multi-word (handle multi-word as phrase without quotes).
    let lower = trimmed.to_lowercase();
    let words: Vec<&str> = lower.split_whitespace().collect();
    if words.len() > 1 {
        // Multi-word without quotes → treat as phrase.
        return TermKind::Phrase(words.into_iter().map(String::from).collect());
    }

    TermKind::Exact(lower)
}

// ─── Tantivy query building helpers ──────────────────────────────────────────

/// Resolve a field name in `schema` and return the field handle.
///
/// Returns `None` if the field does not exist in the schema (should never
/// happen for well-formed schemas built by `TantivySearchIndex::build_schema`).
fn resolve_field(schema: &Schema, name: &str) -> Option<tantivy::schema::Field> {
    schema.get_field(name).ok()
}

/// Build a single-term or phrase query on a text field for the given search string.
///
/// Parses `value` through [`parse_search_term`] and constructs the appropriate
/// Tantivy query variant. Returns `None` if the field is not found in the
/// schema or the term list is empty.
fn build_text_query(schema: &Schema, field_name: &str, value: &str) -> Option<Box<dyn Query>> {
    let field = resolve_field(schema, field_name)?;
    match parse_search_term(value) {
        TermKind::Exact(word) if !word.is_empty() => {
            let term = Term::from_field_text(field, &word);
            Some(Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)))
        }
        TermKind::Phrase(tokens) if !tokens.is_empty() => {
            if tokens.len() == 1 {
                let term = Term::from_field_text(field, &tokens[0]);
                Some(Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)))
            } else {
                let terms: Vec<Term> = tokens
                    .iter()
                    .map(|t| Term::from_field_text(field, t))
                    .collect();
                Some(Box::new(PhraseQuery::new(terms)))
            }
        }
        TermKind::Fuzzy { term, distance } if !term.is_empty() => {
            let t = Term::from_field_text(field, &term);
            Some(Box::new(FuzzyTermQuery::new(t, distance, true)))
        }
        _ => None,
    }
}

/// Build a full-text query (OR union) across `subject`, `body`, and
/// `header_values` fields for the given search string.
fn build_fulltext_query(schema: &Schema, value: &str) -> Box<dyn Query> {
    let field_names = ["subject", "body", "header_values"];
    let mut clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();

    for name in &field_names {
        if let Some(q) = build_text_query(schema, name, value) {
            clauses.push((Occur::Should, q));
        }
    }

    if clauses.is_empty() {
        // Fallback: match everything if no field could be found.
        Box::new(AllQuery)
    } else {
        Box::new(BooleanQuery::union_with_minimum_required_clauses(
            clauses
                .into_iter()
                .map(|(_, q)| q)
                .collect::<Vec<Box<dyn Query>>>(),
            1,
        ))
    }
}

/// Map a [`SearchField`] to the underlying Tantivy field name.
fn field_name_for(field: &SearchField) -> &'static str {
    match field {
        SearchField::Subject => "subject",
        SearchField::From => "from",
        SearchField::To => "to",
        SearchField::Body => "body",
        SearchField::Header(_) | SearchField::Cc | SearchField::Bcc => "header_values",
        SearchField::FullText => "body", // handled separately in the caller
        SearchField::AttachmentFilenames => "attachment_filenames",
    }
}

// ─── Primary translation entry point ─────────────────────────────────────────

/// Translate a [`SearchQuery`] into a Tantivy [`Query`] using the given schema.
///
/// This is the main entry point for IMAP (and other protocol) search handlers.
/// The returned `Box<dyn Query>` can be passed directly to
/// `Searcher::search(&query, &TopDocs::…)`.
///
/// # Invariants
///
/// - [`SearchQuery::All`] → [`AllQuery`] (matches every document).
/// - [`SearchQuery::None`] → an empty MUST-NOT boolean (matches nothing).
/// - [`SearchQuery::Not`] wraps its inner in a `MustNot` + `Must: AllQuery`
///   pair, because Tantivy requires at least one positive clause.
pub fn search_query_to_tantivy(query: &SearchQuery, schema: &Schema) -> Box<dyn Query> {
    match query {
        SearchQuery::All => Box::new(AllQuery),

        SearchQuery::None => {
            // Matches nothing: use an impossible TermQuery on a sentinel value
            // that can never appear in real data (null-byte prefix).
            if let Some(f) = resolve_field(schema, "message_id") {
                let t = Term::from_field_text(f, "\x00__none__\x00");
                Box::new(TermQuery::new(t, IndexRecordOption::Basic)) as Box<dyn Query>
            } else {
                Box::new(AllQuery) as Box<dyn Query>
            }
        }

        SearchQuery::Condition(cond) => translate_condition(cond, schema),

        SearchQuery::And(sub) => {
            if sub.is_empty() {
                return Box::new(AllQuery);
            }
            let clauses: Vec<(Occur, Box<dyn Query>)> = sub
                .iter()
                .map(|q| (Occur::Must, search_query_to_tantivy(q, schema)))
                .collect();
            Box::new(BooleanQuery::new(clauses))
        }

        SearchQuery::Or(sub) => {
            if sub.is_empty() {
                return Box::new(AllQuery);
            }
            let sub_queries: Vec<Box<dyn Query>> = sub
                .iter()
                .map(|q| search_query_to_tantivy(q, schema))
                .collect();
            Box::new(BooleanQuery::union_with_minimum_required_clauses(
                sub_queries,
                1,
            ))
        }

        SearchQuery::Not(inner) => {
            // Tantivy requires at least one positive clause in a BooleanQuery.
            // We add Must(AllQuery) so the MustNot clause is effective.
            let positive: Box<dyn Query> = Box::new(AllQuery);
            let negative = search_query_to_tantivy(inner, schema);
            Box::new(BooleanQuery::new(vec![
                (Occur::Must, positive),
                (Occur::MustNot, negative),
            ]))
        }
    }
}

/// Translate a single [`SearchCondition`] into a Tantivy query.
fn translate_condition(cond: &SearchCondition, schema: &Schema) -> Box<dyn Query> {
    match &cond.comparator {
        SearchComparator::Contains(value) | SearchComparator::Equals(value) => match &cond.field {
            SearchField::FullText => build_fulltext_query(schema, value),
            other => {
                let name = field_name_for(other);
                build_text_query(schema, name, value).unwrap_or_else(|| Box::new(AllQuery))
            }
        },

        SearchComparator::DateSince(ts) => {
            if let Some(date_field) = resolve_field(schema, "date") {
                let lower = Term::from_field_i64(date_field, *ts);
                Box::new(RangeQuery::new(Bound::Included(lower), Bound::Unbounded))
            } else {
                Box::new(AllQuery)
            }
        }

        SearchComparator::DateBefore(ts) => {
            if let Some(date_field) = resolve_field(schema, "date") {
                let upper = Term::from_field_i64(date_field, *ts);
                Box::new(RangeQuery::new(Bound::Unbounded, Bound::Excluded(upper)))
            } else {
                Box::new(AllQuery)
            }
        }

        SearchComparator::DateOn(ts) => {
            // Match messages where `ts <= date < ts + 86400` (one full day).
            if let Some(date_field) = resolve_field(schema, "date") {
                let lower = Term::from_field_i64(date_field, *ts);
                let upper = Term::from_field_i64(date_field, ts + 86_400);
                Box::new(RangeQuery::new(
                    Bound::Included(lower),
                    Bound::Excluded(upper),
                ))
            } else {
                Box::new(AllQuery)
            }
        }
    }
}

// ─── JMAP translation ─────────────────────────────────────────────────────────

/// Translate a [`JmapSearchFilter`] into a Tantivy [`Query`].
///
/// Each populated field in the filter produces a MUST clause; all clauses are
/// ANDed together. An empty filter (all `None`) returns [`AllQuery`].
///
/// The `text` field (RFC 8621 full-text) produces a SHOULD union across
/// `subject`, `body`, and `header_values`.
pub fn jmap_filter_to_tantivy(filter: &JmapSearchFilter, schema: &Schema) -> Box<dyn Query> {
    let mut clauses: Vec<(Occur, Box<dyn Query>)> = Vec::new();

    // Full-text search across subject, body, header_values.
    if let Some(text) = &filter.text {
        if !text.is_empty() {
            clauses.push((Occur::Must, build_fulltext_query(schema, text)));
        }
    }

    // Per-field text conditions.
    let field_map: &[(&Option<String>, &str)] = &[
        (&filter.from, "from"),
        (&filter.to, "to"),
        (&filter.subject, "subject"),
        (&filter.body, "body"),
    ];

    for (opt, field_name) in field_map {
        if let Some(val) = opt {
            if !val.is_empty() {
                if let Some(q) = build_text_query(schema, field_name, val) {
                    clauses.push((Occur::Must, q));
                }
            }
        }
    }

    // Cc and Bcc map to header_values.
    for val in [&filter.cc, &filter.bcc].into_iter().flatten() {
        if !val.is_empty() {
            if let Some(q) = build_text_query(schema, "header_values", val) {
                clauses.push((Occur::Must, q));
            }
        }
    }

    // Date range (after = inclusive lower bound, before = exclusive upper bound).
    if let (Some(after), Some(before)) = (filter.after, filter.before) {
        if let Some(date_field) = resolve_field(schema, "date") {
            let lower = Term::from_field_i64(date_field, after);
            let upper = Term::from_field_i64(date_field, before);
            let range: Box<dyn Query> = Box::new(RangeQuery::new(
                Bound::Included(lower),
                Bound::Excluded(upper),
            ));
            clauses.push((Occur::Must, range));
        }
    } else if let Some(after) = filter.after {
        if let Some(date_field) = resolve_field(schema, "date") {
            let lower = Term::from_field_i64(date_field, after);
            let range: Box<dyn Query> =
                Box::new(RangeQuery::new(Bound::Included(lower), Bound::Unbounded));
            clauses.push((Occur::Must, range));
        }
    } else if let Some(before) = filter.before {
        if let Some(date_field) = resolve_field(schema, "date") {
            let upper = Term::from_field_i64(date_field, before);
            let range: Box<dyn Query> =
                Box::new(RangeQuery::new(Bound::Unbounded, Bound::Excluded(upper)));
            clauses.push((Occur::Must, range));
        }
    }

    if clauses.is_empty() {
        Box::new(AllQuery)
    } else {
        Box::new(BooleanQuery::new(clauses))
    }
}

// ─── Tests ────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;
    use crate::SearchIndex;
    use bytes::Bytes;
    use rusmes_proto::mail::Mail;
    use rusmes_proto::message::{HeaderMap, MessageBody, MessageId, MimeMessage};

    // ── Helpers ───────────────────────────────────────────────────────────────

    /// Build a minimal schema identical to the one used by `TantivySearchIndex`.
    /// Used by unit tests that only need the schema (no actual index).
    fn make_schema() -> tantivy::schema::Schema {
        use tantivy::schema::{NumericOptions, STORED, TEXT};
        let mut b = tantivy::schema::SchemaBuilder::default();
        b.add_text_field("message_id", STORED);
        b.add_text_field("from", TEXT | STORED);
        b.add_text_field("to", TEXT | STORED);
        b.add_text_field("subject", TEXT | STORED);
        b.add_text_field("body", TEXT);
        b.add_text_field("attachment_filenames", TEXT | STORED);
        b.add_text_field("header_values", TEXT);
        b.add_i64_field("date", NumericOptions::default().set_indexed().set_stored());
        b.build()
    }

    // ─── parse_search_term ────────────────────────────────────────────────────

    #[test]
    fn test_parse_exact() {
        match parse_search_term("hello") {
            TermKind::Exact(s) => assert_eq!(s, "hello"),
            other => panic!("expected Exact, got {other:?}"),
        }
    }

    #[test]
    fn test_parse_exact_lowercases() {
        match parse_search_term("Hello") {
            TermKind::Exact(s) => assert_eq!(s, "hello"),
            other => panic!("expected Exact, got {other:?}"),
        }
    }

    #[test]
    fn test_parse_phrase() {
        match parse_search_term("\"hello world\"") {
            TermKind::Phrase(tokens) => {
                assert_eq!(tokens, vec!["hello", "world"]);
            }
            other => panic!("expected Phrase, got {other:?}"),
        }
    }

    #[test]
    fn test_parse_phrase_lowercases() {
        match parse_search_term("\"Hello World\"") {
            TermKind::Phrase(tokens) => {
                assert_eq!(tokens, vec!["hello", "world"]);
            }
            other => panic!("expected Phrase, got {other:?}"),
        }
    }

    #[test]
    fn test_parse_fuzzy() {
        match parse_search_term("hello~2") {
            TermKind::Fuzzy { term, distance } => {
                assert_eq!(term, "hello");
                assert_eq!(distance, 2);
            }
            other => panic!("expected Fuzzy, got {other:?}"),
        }
    }

    #[test]
    fn test_parse_fuzzy_lowercases() {
        match parse_search_term("Hello~1") {
            TermKind::Fuzzy { term, distance } => {
                assert_eq!(term, "hello");
                assert_eq!(distance, 1);
            }
            other => panic!("expected Fuzzy, got {other:?}"),
        }
    }

    #[test]
    fn test_parse_multiword_becomes_phrase() {
        match parse_search_term("hello world") {
            TermKind::Phrase(tokens) => {
                assert_eq!(tokens, vec!["hello", "world"]);
            }
            other => panic!("expected Phrase for multi-word, got {other:?}"),
        }
    }

    // ─── Full-pipeline tests using TantivySearchIndex ────────────────────────

    fn make_mail_raw(raw: &str) -> (MessageId, Mail) {
        let message_id = MessageId::new();
        let data = raw.as_bytes();
        let message = MimeMessage::parse_from_bytes(data).unwrap_or_else(|_| {
            let mut hdr = HeaderMap::new();
            hdr.insert("content-type", "text/plain");
            MimeMessage::new(hdr, MessageBody::Small(Bytes::from(raw.to_owned())))
        });
        let mail = Mail::new(None, vec![], message, None, None);
        (message_id, mail)
    }

    fn make_search_index() -> (crate::TantivySearchIndex, tempfile::TempDir) {
        let dir = tempfile::TempDir::new().expect("temp dir");
        let idx = crate::TantivySearchIndex::new(dir.path()).expect("create index");
        (idx, dir)
    }

    /// Index a message into `idx`, commit, and return the message id.
    async fn index_one(idx: &crate::TantivySearchIndex, raw: &str) -> MessageId {
        let (mid, mail) = make_mail_raw(raw);
        idx.index_message(&mid, &mail).await.expect("index");
        idx.commit().await.expect("commit");
        mid
    }

    // ── test_subject_query ────────────────────────────────────────────────────

    #[tokio::test]
    async fn test_subject_query() {
        let (idx, _dir) = make_search_index();

        // Note: Subject is "Hello World" — mixed-case, so translator must lowercase.
        let raw = concat!(
            "From: sender@example.com\r\n",
            "To: recv@example.com\r\n",
            "Subject: Hello World\r\n",
            "Content-Type: text/plain\r\n",
            "\r\n",
            "Some body text.\r\n",
        );
        let mid = index_one(&idx, raw).await;

        let schema = idx.schema();
        let query = search_query_to_tantivy(
            &SearchQuery::Condition(SearchCondition {
                field: SearchField::Subject,
                comparator: SearchComparator::Contains("Hello".to_string()),
            }),
            &schema,
        );

        let results = idx.search_by_query(query, 10).expect("search");
        assert!(
            !results.is_empty(),
            "subject query should return the indexed message"
        );
        assert_eq!(results[0], *mid.as_uuid());
    }

    // ── test_date_range_query ─────────────────────────────────────────────────

    #[tokio::test]
    async fn test_date_range_query() {
        let (idx, _dir) = make_search_index();

        // Two messages: one from 2025, one from 2024.
        // 2025-06-01T00:00:00Z = 1748736000
        // 2024-01-01T00:00:00Z = 1704067200
        let raw_recent = concat!(
            "From: alice@example.com\r\n",
            "Date: Sun, 1 Jun 2025 00:00:00 +0000\r\n",
            "Subject: Recent\r\n",
            "Content-Type: text/plain\r\n",
            "\r\n",
            "Recent message.\r\n",
        );
        let raw_old = concat!(
            "From: bob@example.com\r\n",
            "Date: Mon, 1 Jan 2024 00:00:00 +0000\r\n",
            "Subject: Old\r\n",
            "Content-Type: text/plain\r\n",
            "\r\n",
            "Old message.\r\n",
        );

        let mid_recent = index_one(&idx, raw_recent).await;
        let _mid_old = index_one(&idx, raw_old).await;

        let schema = idx.schema();

        // DateSince 2025-01-01 (1735689600) should match only the recent message.
        let ts_2025: i64 = 1_735_689_600;
        let query = search_query_to_tantivy(
            &SearchQuery::Condition(SearchCondition {
                field: SearchField::Subject, // field doesn't matter for date query
                comparator: SearchComparator::DateSince(ts_2025),
            }),
            &schema,
        );
        let results = idx.search_by_query(query, 10).expect("search");
        assert!(
            !results.is_empty(),
            "DateSince should match at least one message"
        );
        assert!(
            results.contains(mid_recent.as_uuid()),
            "DateSince should include the 2025 message"
        );

        // DateBefore 2025-01-01 should match only the old message.
        let query_before = search_query_to_tantivy(
            &SearchQuery::Condition(SearchCondition {
                field: SearchField::Subject,
                comparator: SearchComparator::DateBefore(ts_2025),
            }),
            &schema,
        );
        let results_before = idx
            .search_by_query(query_before, 10)
            .expect("search before");
        assert!(
            !results_before.contains(mid_recent.as_uuid()),
            "DateBefore should exclude the 2025 message"
        );
    }

    // ── test_full_text_query ──────────────────────────────────────────────────

    #[tokio::test]
    async fn test_full_text_query() {
        let (idx, _dir) = make_search_index();

        // One message has the word in Subject, another in body.
        let raw_subject = concat!(
            "From: alice@example.com\r\n",
            "Subject: Quarterly Report\r\n",
            "Content-Type: text/plain\r\n",
            "\r\n",
            "See attached.\r\n",
        );
        let raw_body = concat!(
            "From: bob@example.com\r\n",
            "Subject: Meeting notes\r\n",
            "Content-Type: text/plain\r\n",
            "\r\n",
            "Quarterly budget review.\r\n",
        );

        let mid1 = index_one(&idx, raw_subject).await;
        let mid2 = index_one(&idx, raw_body).await;

        let schema = idx.schema();
        let filter = JmapSearchFilter {
            text: Some("quarterly".to_string()),
            ..Default::default()
        };
        let query = jmap_filter_to_tantivy(&filter, &schema);
        let results = idx.search_by_query(query, 10).expect("search");

        assert!(
            results.contains(mid1.as_uuid()),
            "full-text query should match subject field"
        );
        assert!(
            results.contains(mid2.as_uuid()),
            "full-text query should match body field"
        );
    }

    // ── test_phrase_query ──────────────────────────────────────────────────────

    #[tokio::test]
    async fn test_phrase_query() {
        let (idx, _dir) = make_search_index();

        // Two messages: one contains the exact phrase, one has the words reversed.
        let raw_match = concat!(
            "From: alice@example.com\r\n",
            "Subject: Hello World Test\r\n",
            "Content-Type: text/plain\r\n",
            "\r\n",
            "The phrase hello world appears here.\r\n",
        );
        let raw_no_match = concat!(
            "From: alice@example.com\r\n",
            "Subject: World Hello Test\r\n",
            "Content-Type: text/plain\r\n",
            "\r\n",
            "The words world and hello appear in reverse.\r\n",
        );

        let mid_match = index_one(&idx, raw_match).await;
        let mid_no_match = index_one(&idx, raw_no_match).await;

        let schema = idx.schema();
        // Phrase query: "hello world" (adjacent tokens, must match in order).
        let query = search_query_to_tantivy(
            &SearchQuery::Condition(SearchCondition {
                field: SearchField::Body,
                comparator: SearchComparator::Contains("\"hello world\"".to_string()),
            }),
            &schema,
        );

        let results = idx.search_by_query(query, 10).expect("search");
        assert!(
            results.contains(mid_match.as_uuid()),
            "phrase query must match the message with adjacent 'hello world'"
        );
        assert!(
            !results.contains(mid_no_match.as_uuid()),
            "phrase query must NOT match 'world hello' (reversed order)"
        );
    }

    // ── test_fuzzy_query ──────────────────────────────────────────────────────

    #[tokio::test]
    async fn test_fuzzy_query() {
        let (idx, _dir) = make_search_index();

        let raw = concat!(
            "From: alice@example.com\r\n",
            "Subject: Typo test\r\n",
            "Content-Type: text/plain\r\n",
            "\r\n",
            "The word helo is misspelled.\r\n",
        );
        let mid = index_one(&idx, raw).await;

        let schema = idx.schema();
        // Fuzzy: "hello~1" should match "helo" (1 edit distance).
        let query = search_query_to_tantivy(
            &SearchQuery::Condition(SearchCondition {
                field: SearchField::Body,
                comparator: SearchComparator::Contains("hello~1".to_string()),
            }),
            &schema,
        );

        let results = idx.search_by_query(query, 10).expect("search");
        assert!(
            results.contains(mid.as_uuid()),
            "fuzzy query hello~1 should match 'helo'"
        );
    }

    // ── test_boolean_and ──────────────────────────────────────────────────────

    #[tokio::test]
    async fn test_boolean_and() {
        let (idx, _dir) = make_search_index();

        let raw_both = concat!(
            "From: alice@example.com\r\n",
            "Subject: Budget Review\r\n",
            "Content-Type: text/plain\r\n",
            "\r\n",
            "Quarterly budget review.\r\n",
        );
        let raw_subject_only = concat!(
            "From: zach@example.com\r\n",
            "Subject: Budget Review\r\n",
            "Content-Type: text/plain\r\n",
            "\r\n",
            "Different content.\r\n",
        );

        let mid_both = index_one(&idx, raw_both).await;
        let mid_subject_only = index_one(&idx, raw_subject_only).await;

        let schema = idx.schema();
        // AND: Subject contains "budget" AND From contains "alice".
        let query = search_query_to_tantivy(
            &SearchQuery::And(vec![
                SearchQuery::Condition(SearchCondition {
                    field: SearchField::Subject,
                    comparator: SearchComparator::Contains("budget".to_string()),
                }),
                SearchQuery::Condition(SearchCondition {
                    field: SearchField::From,
                    comparator: SearchComparator::Contains("alice".to_string()),
                }),
            ]),
            &schema,
        );

        let results = idx.search_by_query(query, 10).expect("search");
        assert!(
            results.contains(mid_both.as_uuid()),
            "AND query should match the message with both 'budget' in subject and 'alice' in from"
        );
        assert!(
            !results.contains(mid_subject_only.as_uuid()),
            "AND query should NOT match message where from is 'zach', not 'alice'"
        );
    }

    // ── Unit test for translate_condition helper ───────────────────────────────

    #[test]
    fn test_translate_condition_date_since_does_not_panic() {
        let schema = make_schema();
        let cond = SearchCondition {
            field: SearchField::Subject,
            comparator: SearchComparator::DateSince(1_735_689_600),
        };
        // Must not panic.
        let _q = translate_condition(&cond, &schema);
    }

    #[test]
    fn test_jmap_filter_empty_returns_allquery_type() {
        let schema = make_schema();
        let filter = JmapSearchFilter::default();
        // Empty filter should build without panic and logically match everything.
        let q = jmap_filter_to_tantivy(&filter, &schema);
        // We can't inspect the concrete type, but we can verify it doesn't panic.
        let _ = q.box_clone();
    }
}