Skip to main content

mxr_search/
lib.rs

1pub mod ast;
2mod index;
3pub mod parser;
4pub mod query_builder;
5mod saved;
6mod schema;
7
8pub use ast::*;
9pub use index::{SearchIndex, SearchPage, SearchResult};
10pub use parser::{parse_query, ParseError};
11pub use query_builder::QueryBuilder;
12pub use saved::SavedSearchService;
13pub use schema::MxrSchema;
14
15#[cfg(test)]
16mod tests {
17    use super::*;
18    use mxr_core::id::*;
19    use mxr_core::types::*;
20
21    fn make_envelope(subject: &str, snippet: &str, from_name: &str) -> Envelope {
22        make_envelope_full(
23            subject,
24            snippet,
25            from_name,
26            "test@example.com",
27            MessageFlags::READ,
28            false,
29        )
30    }
31
32    fn make_envelope_full(
33        subject: &str,
34        snippet: &str,
35        from_name: &str,
36        from_email: &str,
37        flags: MessageFlags,
38        has_attachments: bool,
39    ) -> Envelope {
40        Envelope {
41            id: MessageId::new(),
42            account_id: AccountId::new(),
43            provider_id: format!("fake-{}", subject.len()),
44            thread_id: ThreadId::new(),
45            message_id_header: None,
46            in_reply_to: None,
47            references: vec![],
48            from: Address {
49                name: Some(from_name.to_string()),
50                email: from_email.to_string(),
51            },
52            to: vec![Address {
53                name: None,
54                email: "recipient@example.com".to_string(),
55            }],
56            cc: vec![Address {
57                name: None,
58                email: "team@example.com".to_string(),
59            }],
60            bcc: vec![Address {
61                name: None,
62                email: "hidden@example.com".to_string(),
63            }],
64            subject: subject.to_string(),
65            date: chrono::Utc::now(),
66            flags,
67            snippet: snippet.to_string(),
68            has_attachments,
69            size_bytes: 1000,
70            unsubscribe: UnsubscribeMethod::None,
71            label_provider_ids: vec!["notifications".to_string()],
72        }
73    }
74
75    #[test]
76    fn search_by_subject_keyword() {
77        let mut idx = SearchIndex::in_memory().unwrap();
78        let subjects = [
79            "Deployment plan for v2.3",
80            "Q1 Report review",
81            "This Week in Rust #580",
82            "Invoice #2847",
83            "Team standup notes",
84            "Summer trip planning",
85            "PR review: fix auth",
86            "HN Weekly Digest",
87            "RustConf 2026 invite",
88            "CI pipeline failures",
89        ];
90        let mut target_id = String::new();
91        for (i, subj) in subjects.iter().enumerate() {
92            let env = make_envelope(subj, &format!("Snippet for msg {}", i), "Alice");
93            if i == 0 {
94                target_id = env.id.as_str();
95            }
96            idx.index_envelope(&env).unwrap();
97        }
98        idx.commit().unwrap();
99
100        let results = idx
101            .search("deployment", 10, 0, SortOrder::Relevance)
102            .unwrap();
103        assert_eq!(results.results.len(), 1);
104        assert_eq!(results.results[0].message_id, target_id);
105    }
106
107    #[test]
108    fn field_boost_ranking() {
109        let mut idx = SearchIndex::in_memory().unwrap();
110
111        let env_subject = make_envelope("Critical deployment issue", "Nothing special here", "Bob");
112        let env_snippet = make_envelope("Regular update", "The deployment went well", "Carol");
113
114        let subject_id = env_subject.id.as_str();
115
116        idx.index_envelope(&env_subject).unwrap();
117        idx.index_envelope(&env_snippet).unwrap();
118        idx.commit().unwrap();
119
120        let results = idx
121            .search("deployment", 10, 0, SortOrder::Relevance)
122            .unwrap();
123        assert_eq!(results.results.len(), 2);
124        // Subject match should rank higher due to 3.0 boost vs 1.0 snippet
125        assert_eq!(results.results[0].message_id, subject_id);
126    }
127
128    #[test]
129    fn body_indexing() {
130        let mut idx = SearchIndex::in_memory().unwrap();
131
132        let env = make_envelope("Meeting notes", "Quick summary", "Alice");
133        let env_id = env.id.as_str();
134
135        idx.index_envelope(&env).unwrap();
136        idx.commit().unwrap();
137
138        // Search for body-only keyword should find nothing yet
139        let results = idx.search("canary", 10, 0, SortOrder::Relevance).unwrap();
140        assert_eq!(results.results.len(), 0);
141
142        // Now index with body
143        let body = MessageBody {
144            message_id: env.id.clone(),
145            text_plain: Some("Deploy canary to 5% of traffic first".to_string()),
146            text_html: None,
147            attachments: vec![],
148            fetched_at: chrono::Utc::now(),
149            metadata: MessageMetadata::default(),
150        };
151        idx.index_body(&env, &body).unwrap();
152        idx.commit().unwrap();
153
154        let results = idx.search("canary", 10, 0, SortOrder::Relevance).unwrap();
155        assert_eq!(results.results.len(), 1);
156        assert_eq!(results.results[0].message_id, env_id);
157    }
158
159    #[test]
160    fn remove_document() {
161        let mut idx = SearchIndex::in_memory().unwrap();
162
163        let env = make_envelope("Remove me", "This should be gone", "Alice");
164        idx.index_envelope(&env).unwrap();
165        idx.commit().unwrap();
166
167        let results = idx.search("remove", 10, 0, SortOrder::Relevance).unwrap();
168        assert_eq!(results.results.len(), 1);
169
170        idx.remove_document(&env.id);
171        idx.commit().unwrap();
172
173        let results = idx.search("remove", 10, 0, SortOrder::Relevance).unwrap();
174        assert_eq!(results.results.len(), 0);
175    }
176
177    #[test]
178    fn empty_search() {
179        let idx = SearchIndex::in_memory().unwrap();
180        let results = idx
181            .search("nonexistent", 10, 0, SortOrder::Relevance)
182            .unwrap();
183        assert!(results.results.is_empty());
184    }
185
186    #[test]
187    fn date_desc_search_returns_newest_first_and_sinks_future_dates() {
188        let mut idx = SearchIndex::in_memory().unwrap();
189
190        let mut newest = make_envelope("crates.io newest", "release", "Alice");
191        newest.date = chrono::Utc::now();
192        let mut older = make_envelope("crates.io older", "release", "Bob");
193        older.date = chrono::Utc::now() - chrono::Duration::days(2);
194        let mut poisoned_future = make_envelope("crates.io future", "release", "Mallory");
195        poisoned_future.date = chrono::Utc::now() + chrono::Duration::days(400);
196
197        idx.index_envelope(&older).unwrap();
198        idx.index_envelope(&poisoned_future).unwrap();
199        idx.index_envelope(&newest).unwrap();
200        idx.commit().unwrap();
201
202        let results = idx.search("crates.io", 10, 0, SortOrder::DateDesc).unwrap();
203        let ids = results
204            .results
205            .iter()
206            .map(|result| result.message_id.as_str().to_string())
207            .collect::<Vec<_>>();
208
209        assert_eq!(ids[0], newest.id.as_str());
210        assert_eq!(ids[1], older.id.as_str());
211        assert_eq!(ids[2], poisoned_future.id.as_str());
212    }
213
214    #[test]
215    fn search_paginates_with_offset_and_has_more() {
216        let mut idx = SearchIndex::in_memory().unwrap();
217        for i in 0..5 {
218            let mut env = make_envelope(&format!("deployment {i}"), "rollout", "Alice");
219            env.date = chrono::Utc::now() - chrono::Duration::minutes(i);
220            idx.index_envelope(&env).unwrap();
221        }
222        idx.commit().unwrap();
223
224        let first_page = idx.search("deployment", 2, 0, SortOrder::DateDesc).unwrap();
225        let second_page = idx.search("deployment", 2, 2, SortOrder::DateDesc).unwrap();
226
227        assert_eq!(first_page.results.len(), 2);
228        assert!(first_page.has_more);
229        assert_eq!(second_page.results.len(), 2);
230        assert!(second_page.has_more);
231        assert_ne!(
232            first_page.results[0].message_id,
233            second_page.results[0].message_id
234        );
235    }
236
237    // -- E2E: parse → build → search integration tests --
238
239    fn build_e2e_index() -> (SearchIndex, Vec<Envelope>) {
240        let mut idx = SearchIndex::in_memory().unwrap();
241        let envelopes = vec![
242            make_envelope_full(
243                "Deployment plan for v2",
244                "Rolling out to prod",
245                "Alice",
246                "alice@example.com",
247                MessageFlags::empty(), // unread
248                false,
249            ),
250            make_envelope_full(
251                "Invoice #2847",
252                "Payment due next week",
253                "Bob",
254                "bob@example.com",
255                MessageFlags::READ | MessageFlags::STARRED,
256                true, // has attachment
257            ),
258            make_envelope_full(
259                "Team standup notes",
260                "Sprint review action items",
261                "Carol",
262                "carol@example.com",
263                MessageFlags::READ,
264                false,
265            ),
266            make_envelope_full(
267                "CI pipeline failures",
268                "Build broken on main",
269                "Alice",
270                "alice@example.com",
271                MessageFlags::empty(), // unread
272                true,                  // has attachment
273            ),
274        ];
275        for env in &envelopes {
276            idx.index_envelope(env).unwrap();
277        }
278        idx.commit().unwrap();
279        (idx, envelopes)
280    }
281
282    fn e2e_search(idx: &SearchIndex, query_str: &str) -> Vec<String> {
283        let ast = parser::parse_query(query_str).unwrap();
284        let schema = MxrSchema::build();
285        let qb = QueryBuilder::new(&schema);
286        let query = qb.build(&ast);
287        idx.search_ast(query, 10, 0, SortOrder::Relevance)
288            .unwrap()
289            .results
290            .into_iter()
291            .map(|r| r.message_id)
292            .collect()
293    }
294
295    #[test]
296    fn e2e_parse_build_search_text() {
297        let (idx, envelopes) = build_e2e_index();
298        let results = e2e_search(&idx, "deployment");
299        assert_eq!(results.len(), 1);
300        assert_eq!(results[0], envelopes[0].id.as_str());
301    }
302
303    #[test]
304    fn e2e_parse_build_search_field() {
305        let (idx, envelopes) = build_e2e_index();
306        let results = e2e_search(&idx, "from:alice@example.com");
307        assert_eq!(results.len(), 2);
308        let alice_ids: Vec<String> = vec![
309            envelopes[0].id.as_str().to_string(),
310            envelopes[3].id.as_str().to_string(),
311        ];
312        for id in &results {
313            assert!(alice_ids.contains(id));
314        }
315    }
316
317    #[test]
318    fn e2e_parse_build_search_compound() {
319        let (idx, envelopes) = build_e2e_index();
320        // from:alice AND is:unread — both alice messages are unread
321        let results = e2e_search(&idx, "from:alice@example.com is:unread");
322        assert_eq!(results.len(), 2);
323        let alice_ids: Vec<String> = vec![
324            envelopes[0].id.as_str().to_string(),
325            envelopes[3].id.as_str().to_string(),
326        ];
327        for id in &results {
328            assert!(alice_ids.contains(id));
329        }
330    }
331
332    #[test]
333    fn e2e_parse_build_search_negation() {
334        let (idx, _envelopes) = build_e2e_index();
335        // -is:read = unread messages (alice's two)
336        let results = e2e_search(&idx, "-is:read");
337        assert_eq!(results.len(), 2);
338    }
339
340    #[test]
341    fn e2e_filter_has_attachment() {
342        let (idx, envelopes) = build_e2e_index();
343        let results = e2e_search(&idx, "has:attachment");
344        assert_eq!(results.len(), 2);
345        let attachment_ids: Vec<String> = vec![
346            envelopes[1].id.as_str().to_string(),
347            envelopes[3].id.as_str().to_string(),
348        ];
349        for id in &results {
350            assert!(attachment_ids.contains(id));
351        }
352    }
353
354    #[test]
355    fn e2e_search_by_label() {
356        let (idx, envelopes) = build_e2e_index();
357        let results = e2e_search(&idx, "label:notifications");
358        assert_eq!(results.len(), envelopes.len());
359    }
360
361    #[test]
362    fn e2e_search_by_label_is_case_insensitive() {
363        let (idx, envelopes) = build_e2e_index();
364        let results = e2e_search(&idx, "label:NOTIFICATIONS");
365        assert_eq!(results.len(), envelopes.len());
366    }
367
368    #[test]
369    fn e2e_filter_starred() {
370        let (idx, envelopes) = build_e2e_index();
371        let results = e2e_search(&idx, "is:starred");
372        assert_eq!(results.len(), 1);
373        assert_eq!(results[0], envelopes[1].id.as_str());
374    }
375
376    #[test]
377    fn e2e_search_cc_and_bcc_fields() {
378        let (idx, envelopes) = build_e2e_index();
379
380        let cc_results = e2e_search(&idx, "cc:team@example.com");
381        assert_eq!(cc_results.len(), envelopes.len());
382
383        let bcc_results = e2e_search(&idx, "bcc:hidden@example.com");
384        assert_eq!(bcc_results.len(), envelopes.len());
385    }
386
387    #[test]
388    fn e2e_search_sent_filter() {
389        let mut idx = SearchIndex::in_memory().unwrap();
390        let sent = make_envelope_full(
391            "Sent follow-up",
392            "Done",
393            "Alice",
394            "alice@example.com",
395            MessageFlags::READ | MessageFlags::SENT,
396            false,
397        );
398        let inbox = make_envelope_full(
399            "Inbox message",
400            "Pending",
401            "Bob",
402            "bob@example.com",
403            MessageFlags::READ,
404            false,
405        );
406        idx.index_envelope(&sent).unwrap();
407        idx.index_envelope(&inbox).unwrap();
408        idx.commit().unwrap();
409
410        let results = e2e_search(&idx, "is:sent");
411        assert_eq!(results, vec![sent.id.as_str().to_string()]);
412    }
413
414    #[test]
415    fn e2e_search_size_and_body_and_filename() {
416        let mut idx = SearchIndex::in_memory().unwrap();
417        let env = make_envelope_full(
418            "Release checklist",
419            "Contains attachment",
420            "Alice",
421            "alice@example.com",
422            MessageFlags::READ,
423            true,
424        );
425        let body = MessageBody {
426            message_id: env.id.clone(),
427            text_plain: Some("Deploy canary to 10% before global rollout".to_string()),
428            text_html: None,
429            attachments: vec![AttachmentMeta {
430                id: AttachmentId::new(),
431                message_id: env.id.clone(),
432                filename: "release-notes-v2.pdf".to_string(),
433                mime_type: "application/pdf".to_string(),
434                size_bytes: 10,
435                local_path: None,
436                provider_id: "att-1".to_string(),
437            }],
438            fetched_at: chrono::Utc::now(),
439            metadata: MessageMetadata::default(),
440        };
441
442        idx.index_body(&env, &body).unwrap();
443        idx.commit().unwrap();
444
445        assert_eq!(
446            e2e_search(&idx, "body:canary"),
447            vec![env.id.as_str().to_string()]
448        );
449        assert_eq!(
450            e2e_search(&idx, "filename:release-notes"),
451            vec![env.id.as_str().to_string()]
452        );
453        assert_eq!(
454            e2e_search(&idx, "size:>=1000"),
455            vec![env.id.as_str().to_string()]
456        );
457    }
458}