1use crate::error::Result;
10use chrono::{DateTime, Utc};
11use serde::{Deserialize, Serialize};
12use std::{collections::HashMap, sync::Arc};
13use uuid::Uuid;
14
15use super::{
16 KeywordQuery, KeywordSearchEngine, KeywordSearchResult, SimilarityQuery, SimilarityResult,
17 VectorSearchEngine,
18};
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct HybridSearchEngineConfig {
23 pub semantic_weight: f32,
25 pub keyword_weight: f32,
27 pub rrf_k: f32,
29 pub default_limit: usize,
31 pub min_score_threshold: f32,
33 pub over_fetch_multiplier: usize,
35}
36
37impl Default for HybridSearchEngineConfig {
38 fn default() -> Self {
39 Self {
40 semantic_weight: 0.5,
41 keyword_weight: 0.5,
42 rrf_k: 60.0,
43 default_limit: 10,
44 min_score_threshold: 0.0,
45 over_fetch_multiplier: 3,
46 }
47 }
48}
49
50#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
52pub enum SearchMode {
53 SemanticOnly,
55 KeywordOnly,
57 #[default]
59 Hybrid,
60}
61
62#[derive(Debug, Clone, Default, Serialize, Deserialize)]
64pub struct MetadataFilters {
65 pub event_type: Option<String>,
67 pub entity_id: Option<String>,
69 pub time_from: Option<DateTime<Utc>>,
71 pub time_to: Option<DateTime<Utc>>,
73}
74
75impl MetadataFilters {
76 pub fn new() -> Self {
77 Self::default()
78 }
79
80 pub fn with_event_type(mut self, event_type: impl Into<String>) -> Self {
81 self.event_type = Some(event_type.into());
82 self
83 }
84
85 pub fn with_entity_id(mut self, entity_id: impl Into<String>) -> Self {
86 self.entity_id = Some(entity_id.into());
87 self
88 }
89
90 pub fn with_time_range(mut self, from: DateTime<Utc>, to: DateTime<Utc>) -> Self {
91 self.time_from = Some(from);
92 self.time_to = Some(to);
93 self
94 }
95
96 pub fn with_time_from(mut self, from: DateTime<Utc>) -> Self {
97 self.time_from = Some(from);
98 self
99 }
100
101 pub fn with_time_to(mut self, to: DateTime<Utc>) -> Self {
102 self.time_to = Some(to);
103 self
104 }
105
106 pub fn has_filters(&self) -> bool {
108 self.event_type.is_some()
109 || self.entity_id.is_some()
110 || self.time_from.is_some()
111 || self.time_to.is_some()
112 }
113}
114
115#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct SearchQuery {
118 pub query: String,
120 pub limit: usize,
122 pub tenant_id: Option<String>,
124 pub mode: SearchMode,
126 pub filters: MetadataFilters,
128 pub min_similarity: Option<f32>,
130}
131
132impl SearchQuery {
133 pub fn new(query: impl Into<String>) -> Self {
134 Self {
135 query: query.into(),
136 limit: 10,
137 tenant_id: None,
138 mode: SearchMode::Hybrid,
139 filters: MetadataFilters::default(),
140 min_similarity: None,
141 }
142 }
143
144 pub fn with_limit(mut self, limit: usize) -> Self {
145 self.limit = limit;
146 self
147 }
148
149 pub fn with_tenant(mut self, tenant_id: impl Into<String>) -> Self {
150 self.tenant_id = Some(tenant_id.into());
151 self
152 }
153
154 pub fn with_mode(mut self, mode: SearchMode) -> Self {
155 self.mode = mode;
156 self
157 }
158
159 pub fn with_filters(mut self, filters: MetadataFilters) -> Self {
160 self.filters = filters;
161 self
162 }
163
164 pub fn with_event_type(mut self, event_type: impl Into<String>) -> Self {
165 self.filters.event_type = Some(event_type.into());
166 self
167 }
168
169 pub fn with_entity_id(mut self, entity_id: impl Into<String>) -> Self {
170 self.filters.entity_id = Some(entity_id.into());
171 self
172 }
173
174 pub fn with_time_range(mut self, from: DateTime<Utc>, to: DateTime<Utc>) -> Self {
175 self.filters.time_from = Some(from);
176 self.filters.time_to = Some(to);
177 self
178 }
179
180 pub fn with_min_similarity(mut self, threshold: f32) -> Self {
181 self.min_similarity = Some(threshold);
182 self
183 }
184
185 pub fn semantic(query: impl Into<String>) -> Self {
187 Self::new(query).with_mode(SearchMode::SemanticOnly)
188 }
189
190 pub fn keyword(query: impl Into<String>) -> Self {
192 Self::new(query).with_mode(SearchMode::KeywordOnly)
193 }
194}
195
196#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct HybridSearchResult {
199 pub event_id: Uuid,
201 pub score: f32,
203 pub semantic_score: Option<f32>,
205 pub keyword_score: Option<f32>,
207 pub event_type: Option<String>,
209 pub entity_id: Option<String>,
211 pub source_text: Option<String>,
213}
214
215#[derive(Debug, Clone, Default)]
217pub struct EventMetadata {
218 pub event_type: Option<String>,
219 pub entity_id: Option<String>,
220 pub timestamp: Option<DateTime<Utc>>,
221}
222
223pub struct HybridSearchEngine {
231 config: HybridSearchEngineConfig,
232 vector_engine: Arc<VectorSearchEngine>,
233 keyword_engine: Arc<KeywordSearchEngine>,
234 metadata_cache: parking_lot::RwLock<HashMap<Uuid, EventMetadata>>,
236 stats: parking_lot::RwLock<EngineStats>,
238}
239
240#[derive(Debug, Default, Clone)]
241struct EngineStats {
242 total_searches: u64,
243 semantic_searches: u64,
244 keyword_searches: u64,
245 hybrid_searches: u64,
246}
247
248impl HybridSearchEngine {
249 pub fn new(
251 vector_engine: Arc<VectorSearchEngine>,
252 keyword_engine: Arc<KeywordSearchEngine>,
253 ) -> Self {
254 Self::with_config(
255 vector_engine,
256 keyword_engine,
257 HybridSearchEngineConfig::default(),
258 )
259 }
260
261 pub fn with_config(
263 vector_engine: Arc<VectorSearchEngine>,
264 keyword_engine: Arc<KeywordSearchEngine>,
265 config: HybridSearchEngineConfig,
266 ) -> Self {
267 Self {
268 config,
269 vector_engine,
270 keyword_engine,
271 metadata_cache: parking_lot::RwLock::new(HashMap::new()),
272 stats: parking_lot::RwLock::new(EngineStats::default()),
273 }
274 }
275
276 pub fn config(&self) -> &HybridSearchEngineConfig {
278 &self.config
279 }
280
281 pub fn store_metadata(&self, event_id: Uuid, metadata: EventMetadata) {
283 let mut cache = self.metadata_cache.write();
284 cache.insert(event_id, metadata);
285 }
286
287 #[cfg(any(feature = "vector-search", feature = "keyword-search"))]
292 pub async fn index_event(
293 &self,
294 event_id: Uuid,
295 tenant_id: &str,
296 event_type: &str,
297 entity_id: Option<&str>,
298 payload: &serde_json::Value,
299 timestamp: DateTime<Utc>,
300 ) -> Result<()> {
301 self.store_metadata(
303 event_id,
304 EventMetadata {
305 event_type: Some(event_type.to_string()),
306 entity_id: entity_id.map(std::string::ToString::to_string),
307 timestamp: Some(timestamp),
308 },
309 );
310
311 #[cfg(feature = "keyword-search")]
313 self.keyword_engine
314 .index_event(event_id, tenant_id, event_type, entity_id, payload)?;
315
316 #[cfg(feature = "vector-search")]
318 {
319 let embedding = self.vector_engine.embed_event(payload)?;
320 let source_text = extract_source_text(payload);
321 self.vector_engine
322 .index_event(event_id, tenant_id, embedding, source_text)
323 .await?;
324 }
325
326 Ok(())
327 }
328
329 #[cfg(not(any(feature = "vector-search", feature = "keyword-search")))]
331 pub async fn index_event(
332 &self,
333 event_id: Uuid,
334 _tenant_id: &str,
335 event_type: &str,
336 entity_id: Option<&str>,
337 _payload: &serde_json::Value,
338 timestamp: DateTime<Utc>,
339 ) -> Result<()> {
340 self.store_metadata(
342 event_id,
343 EventMetadata {
344 event_type: Some(event_type.to_string()),
345 entity_id: entity_id.map(str::to_string),
346 timestamp: Some(timestamp),
347 },
348 );
349 Ok(())
350 }
351
352 #[cfg(feature = "keyword-search")]
354 pub fn commit(&self) -> Result<()> {
355 self.keyword_engine.commit()
356 }
357
358 #[cfg(not(feature = "keyword-search"))]
359 pub fn commit(&self) -> Result<()> {
360 Ok(())
361 }
362
363 pub fn search(&self, query: &SearchQuery) -> Result<Vec<HybridSearchResult>> {
372 {
374 let mut stats = self.stats.write();
375 stats.total_searches += 1;
376 match query.mode {
377 SearchMode::SemanticOnly => stats.semantic_searches += 1,
378 SearchMode::KeywordOnly => stats.keyword_searches += 1,
379 SearchMode::Hybrid => stats.hybrid_searches += 1,
380 }
381 }
382
383 let fetch_limit = if query.filters.has_filters() {
385 query.limit * self.config.over_fetch_multiplier
386 } else {
387 query.limit
388 };
389
390 match query.mode {
391 SearchMode::SemanticOnly => self.search_semantic_only(query, fetch_limit),
392 SearchMode::KeywordOnly => self.search_keyword_only(query, fetch_limit),
393 SearchMode::Hybrid => self.search_hybrid(query, fetch_limit),
394 }
395 }
396
397 fn search_semantic_only(
399 &self,
400 query: &SearchQuery,
401 fetch_limit: usize,
402 ) -> Result<Vec<HybridSearchResult>> {
403 let semantic_results = self.run_semantic_search(query, fetch_limit)?;
404
405 let mut results: Vec<HybridSearchResult> = semantic_results
406 .into_iter()
407 .map(|r| HybridSearchResult {
408 event_id: r.event_id,
409 score: r.score,
410 semantic_score: Some(r.score),
411 keyword_score: None,
412 event_type: self.get_cached_event_type(r.event_id),
413 entity_id: self.get_cached_entity_id(r.event_id),
414 source_text: r.source_text,
415 })
416 .collect();
417
418 self.apply_filters(&mut results, &query.filters);
420
421 results.retain(|r| r.score >= self.config.min_score_threshold);
423
424 results.truncate(query.limit);
426
427 Ok(results)
428 }
429
430 fn search_keyword_only(
432 &self,
433 query: &SearchQuery,
434 fetch_limit: usize,
435 ) -> Result<Vec<HybridSearchResult>> {
436 let keyword_results = self.run_keyword_search(query, fetch_limit)?;
437
438 let max_score = keyword_results
440 .iter()
441 .map(|r| r.score)
442 .fold(0.0_f32, f32::max);
443
444 let mut results: Vec<HybridSearchResult> = keyword_results
445 .into_iter()
446 .map(|r| {
447 let normalized_score = if max_score > 0.0 {
448 r.score / max_score
449 } else {
450 0.0
451 };
452 HybridSearchResult {
453 event_id: r.event_id,
454 score: normalized_score,
455 semantic_score: None,
456 keyword_score: Some(r.score),
457 event_type: Some(r.event_type),
458 entity_id: r.entity_id,
459 source_text: None,
460 }
461 })
462 .collect();
463
464 self.apply_filters(&mut results, &query.filters);
466
467 results.retain(|r| r.score >= self.config.min_score_threshold);
469
470 results.truncate(query.limit);
472
473 Ok(results)
474 }
475
476 fn search_hybrid(
478 &self,
479 query: &SearchQuery,
480 fetch_limit: usize,
481 ) -> Result<Vec<HybridSearchResult>> {
482 let semantic_results = self.run_semantic_search(query, fetch_limit)?;
484 let keyword_results = self.run_keyword_search(query, fetch_limit)?;
485
486 let mut combined_scores: HashMap<Uuid, HybridSearchResult> = HashMap::new();
488
489 for (rank, result) in semantic_results.into_iter().enumerate() {
491 let rrf_score = 1.0 / (self.config.rrf_k + rank as f32 + 1.0);
492 let weighted_score = rrf_score * self.config.semantic_weight;
493
494 combined_scores.insert(
495 result.event_id,
496 HybridSearchResult {
497 event_id: result.event_id,
498 score: weighted_score,
499 semantic_score: Some(result.score),
500 keyword_score: None,
501 event_type: self.get_cached_event_type(result.event_id),
502 entity_id: self.get_cached_entity_id(result.event_id),
503 source_text: result.source_text,
504 },
505 );
506 }
507
508 for (rank, result) in keyword_results.into_iter().enumerate() {
510 let rrf_score = 1.0 / (self.config.rrf_k + rank as f32 + 1.0);
511 let weighted_score = rrf_score * self.config.keyword_weight;
512
513 if let Some(existing) = combined_scores.get_mut(&result.event_id) {
514 existing.score += weighted_score;
516 existing.keyword_score = Some(result.score);
517 existing.event_type = Some(result.event_type);
519 existing.entity_id = result.entity_id;
520 } else {
521 combined_scores.insert(
522 result.event_id,
523 HybridSearchResult {
524 event_id: result.event_id,
525 score: weighted_score,
526 semantic_score: None,
527 keyword_score: Some(result.score),
528 event_type: Some(result.event_type),
529 entity_id: result.entity_id,
530 source_text: None,
531 },
532 );
533 }
534 }
535
536 let mut results: Vec<HybridSearchResult> = combined_scores.into_values().collect();
538 results.sort_by(|a, b| {
539 b.score
540 .partial_cmp(&a.score)
541 .unwrap_or(std::cmp::Ordering::Equal)
542 });
543
544 self.apply_filters(&mut results, &query.filters);
546
547 results.retain(|r| r.score >= self.config.min_score_threshold);
549
550 results.truncate(query.limit);
552
553 Ok(results)
554 }
555
556 fn run_semantic_search(
558 &self,
559 query: &SearchQuery,
560 limit: usize,
561 ) -> Result<Vec<SimilarityResult>> {
562 let query_embedding = self.vector_engine.embed_text(&query.query)?;
564
565 let similarity_query = SimilarityQuery::new(query_embedding, limit)
566 .with_min_similarity(query.min_similarity.unwrap_or(0.0));
567
568 let similarity_query = if let Some(ref tenant_id) = query.tenant_id {
569 similarity_query.with_tenant(tenant_id.clone())
570 } else {
571 similarity_query
572 };
573
574 self.vector_engine.search_similar(&similarity_query)
575 }
576
577 fn run_keyword_search(
579 &self,
580 query: &SearchQuery,
581 limit: usize,
582 ) -> Result<Vec<KeywordSearchResult>> {
583 let keyword_query = KeywordQuery::new(&query.query).with_limit(limit);
584
585 let keyword_query = if let Some(ref tenant_id) = query.tenant_id {
586 keyword_query.with_tenant(tenant_id)
587 } else {
588 keyword_query
589 };
590
591 self.keyword_engine.search_keywords(&keyword_query)
592 }
593
594 fn apply_filters(&self, results: &mut Vec<HybridSearchResult>, filters: &MetadataFilters) {
596 if !filters.has_filters() {
597 return;
598 }
599
600 let metadata_cache = self.metadata_cache.read();
601
602 results.retain(|result| {
603 let cached = metadata_cache.get(&result.event_id);
605
606 if let Some(ref filter_type) = filters.event_type {
608 let event_type = result
609 .event_type
610 .as_ref()
611 .or_else(|| cached.and_then(|m| m.event_type.as_ref()));
612 match event_type {
613 Some(t) if t == filter_type => {}
614 _ => return false,
615 }
616 }
617
618 if let Some(ref filter_entity) = filters.entity_id {
620 let entity_id = result
621 .entity_id
622 .as_ref()
623 .or_else(|| cached.and_then(|m| m.entity_id.as_ref()));
624 match entity_id {
625 Some(e) if e == filter_entity => {}
626 _ => return false,
627 }
628 }
629
630 if filters.time_from.is_some() || filters.time_to.is_some() {
632 if let Some(timestamp) = cached.and_then(|m| m.timestamp) {
633 if let Some(from) = filters.time_from
634 && timestamp < from
635 {
636 return false;
637 }
638 if let Some(to) = filters.time_to
639 && timestamp > to
640 {
641 return false;
642 }
643 } else {
644 return false;
646 }
647 }
648
649 true
650 });
651 }
652
653 fn get_cached_event_type(&self, event_id: Uuid) -> Option<String> {
655 self.metadata_cache
656 .read()
657 .get(&event_id)
658 .and_then(|m| m.event_type.clone())
659 }
660
661 fn get_cached_entity_id(&self, event_id: Uuid) -> Option<String> {
663 self.metadata_cache
664 .read()
665 .get(&event_id)
666 .and_then(|m| m.entity_id.clone())
667 }
668
669 pub fn stats(&self) -> (u64, u64, u64, u64) {
671 let stats = self.stats.read();
672 (
673 stats.total_searches,
674 stats.semantic_searches,
675 stats.keyword_searches,
676 stats.hybrid_searches,
677 )
678 }
679
680 pub fn health_check(&self) -> Result<()> {
682 self.vector_engine.health_check()?;
683 self.keyword_engine.health_check()?;
684 Ok(())
685 }
686
687 pub fn cached_metadata_count(&self) -> usize {
689 self.metadata_cache.read().len()
690 }
691
692 pub fn clear_metadata_cache(&self) {
694 self.metadata_cache.write().clear();
695 }
696}
697
698#[cfg(feature = "vector-search")]
700fn extract_source_text(payload: &serde_json::Value) -> Option<String> {
701 let priority_fields = [
702 "content",
703 "text",
704 "body",
705 "message",
706 "description",
707 "title",
708 "name",
709 ];
710
711 if let serde_json::Value::Object(map) = payload {
712 for field in priority_fields {
713 if let Some(serde_json::Value::String(s)) = map.get(field)
714 && !s.is_empty()
715 {
716 return Some(s.clone());
717 }
718 }
719 }
720
721 Some(payload.to_string())
723}
724
725#[cfg(test)]
726mod tests {
727 use super::*;
728
729 fn create_test_config() -> HybridSearchEngineConfig {
730 HybridSearchEngineConfig {
731 semantic_weight: 0.5,
732 keyword_weight: 0.5,
733 rrf_k: 60.0,
734 default_limit: 10,
735 min_score_threshold: 0.0,
736 over_fetch_multiplier: 3,
737 }
738 }
739
740 #[test]
741 fn test_config_default() {
742 let config = HybridSearchEngineConfig::default();
743 assert_eq!(config.semantic_weight, 0.5);
744 assert_eq!(config.keyword_weight, 0.5);
745 assert_eq!(config.rrf_k, 60.0);
746 assert_eq!(config.default_limit, 10);
747 }
748
749 #[test]
750 fn test_search_query_builder() {
751 let query = SearchQuery::new("test query")
752 .with_limit(20)
753 .with_tenant("tenant-1")
754 .with_mode(SearchMode::Hybrid)
755 .with_event_type("UserCreated")
756 .with_entity_id("user-123")
757 .with_min_similarity(0.7);
758
759 assert_eq!(query.query, "test query");
760 assert_eq!(query.limit, 20);
761 assert_eq!(query.tenant_id, Some("tenant-1".to_string()));
762 assert_eq!(query.mode, SearchMode::Hybrid);
763 assert_eq!(query.filters.event_type, Some("UserCreated".to_string()));
764 assert_eq!(query.filters.entity_id, Some("user-123".to_string()));
765 assert_eq!(query.min_similarity, Some(0.7));
766 }
767
768 #[test]
769 fn test_search_query_semantic_constructor() {
770 let query = SearchQuery::semantic("natural language query");
771 assert_eq!(query.mode, SearchMode::SemanticOnly);
772 }
773
774 #[test]
775 fn test_search_query_keyword_constructor() {
776 let query = SearchQuery::keyword("keyword search");
777 assert_eq!(query.mode, SearchMode::KeywordOnly);
778 }
779
780 #[test]
781 fn test_metadata_filters_builder() {
782 let now = Utc::now();
783 let past = now - chrono::Duration::hours(1);
784
785 let filters = MetadataFilters::new()
786 .with_event_type("OrderPlaced")
787 .with_entity_id("order-456")
788 .with_time_range(past, now);
789
790 assert_eq!(filters.event_type, Some("OrderPlaced".to_string()));
791 assert_eq!(filters.entity_id, Some("order-456".to_string()));
792 assert!(filters.time_from.is_some());
793 assert!(filters.time_to.is_some());
794 assert!(filters.has_filters());
795 }
796
797 #[test]
798 fn test_metadata_filters_has_filters() {
799 let empty = MetadataFilters::new();
800 assert!(!empty.has_filters());
801
802 let with_type = MetadataFilters::new().with_event_type("Test");
803 assert!(with_type.has_filters());
804
805 let with_entity = MetadataFilters::new().with_entity_id("e1");
806 assert!(with_entity.has_filters());
807
808 let with_time = MetadataFilters::new().with_time_from(Utc::now());
809 assert!(with_time.has_filters());
810 }
811
812 #[test]
813 fn test_search_mode_default() {
814 let mode: SearchMode = Default::default();
815 assert_eq!(mode, SearchMode::Hybrid);
816 }
817
818 #[test]
819 fn test_hybrid_search_result_structure() {
820 let result = HybridSearchResult {
821 event_id: Uuid::new_v4(),
822 score: 0.85,
823 semantic_score: Some(0.9),
824 keyword_score: Some(0.8),
825 event_type: Some("UserCreated".to_string()),
826 entity_id: Some("user-123".to_string()),
827 source_text: Some("Test content".to_string()),
828 };
829
830 assert!(result.score > 0.0);
831 assert!(result.semantic_score.is_some());
832 assert!(result.keyword_score.is_some());
833 }
834
835 #[test]
836 fn test_event_metadata_default() {
837 let metadata = EventMetadata::default();
838 assert!(metadata.event_type.is_none());
839 assert!(metadata.entity_id.is_none());
840 assert!(metadata.timestamp.is_none());
841 }
842
843 #[test]
844 fn test_rrf_score_calculation() {
845 let k = 60.0_f32;
847
848 let score_rank_0 = 1.0 / (k + 0.0 + 1.0);
850 assert!((score_rank_0 - 0.01639344).abs() < 0.0001);
851
852 let score_rank_1 = 1.0 / (k + 1.0 + 1.0);
854 assert!((score_rank_1 - 0.01612903).abs() < 0.0001);
855
856 assert!(score_rank_0 > score_rank_1);
858 }
859
860 #[test]
861 fn test_config_weights_validation() {
862 let config = create_test_config();
863
864 assert!((config.semantic_weight + config.keyword_weight - 1.0).abs() < f32::EPSILON);
866 }
867
868 #[test]
869 fn test_search_query_with_time_range() {
870 let now = Utc::now();
871 let past = now - chrono::Duration::days(7);
872
873 let query = SearchQuery::new("test").with_time_range(past, now);
874
875 assert_eq!(query.filters.time_from, Some(past));
876 assert_eq!(query.filters.time_to, Some(now));
877 }
878
879 #[test]
880 fn test_search_query_serialization() {
881 let query = SearchQuery::new("test query")
882 .with_limit(5)
883 .with_tenant("tenant-1")
884 .with_mode(SearchMode::Hybrid);
885
886 let json = serde_json::to_string(&query);
888 assert!(json.is_ok());
889
890 let deserialized: std::result::Result<SearchQuery, _> =
892 serde_json::from_str(&json.unwrap());
893 assert!(deserialized.is_ok());
894
895 let deserialized = deserialized.unwrap();
896 assert_eq!(deserialized.query, "test query");
897 assert_eq!(deserialized.limit, 5);
898 }
899
900 #[test]
901 fn test_hybrid_search_result_serialization() {
902 let result = HybridSearchResult {
903 event_id: Uuid::new_v4(),
904 score: 0.85,
905 semantic_score: Some(0.9),
906 keyword_score: Some(0.8),
907 event_type: Some("Test".to_string()),
908 entity_id: None,
909 source_text: None,
910 };
911
912 let json = serde_json::to_string(&result);
913 assert!(json.is_ok());
914 }
915}
916
917#[cfg(test)]
919#[cfg(all(feature = "vector-search", feature = "keyword-search"))]
920mod integration_tests {
921 use super::*;
922 use crate::infrastructure::search::{KeywordSearchEngineConfig, VectorSearchEngineConfig};
923
924 fn create_test_engines() -> (Arc<VectorSearchEngine>, Arc<KeywordSearchEngine>) {
925 let vector_engine = Arc::new(
926 VectorSearchEngine::with_config(VectorSearchEngineConfig {
927 default_similarity_threshold: 0.0,
928 ..Default::default()
929 })
930 .unwrap(),
931 );
932
933 let keyword_engine = Arc::new(
934 KeywordSearchEngine::with_config(KeywordSearchEngineConfig {
935 auto_commit: true,
936 ..Default::default()
937 })
938 .unwrap(),
939 );
940
941 (vector_engine, keyword_engine)
942 }
943
944 #[tokio::test]
945 async fn test_hybrid_search_integration() {
946 let (vector_engine, keyword_engine) = create_test_engines();
947 let hybrid_engine = HybridSearchEngine::new(vector_engine, keyword_engine);
948
949 let id1 = Uuid::new_v4();
950 let id2 = Uuid::new_v4();
951
952 hybrid_engine
954 .index_event(
955 id1,
956 "tenant-1",
957 "UserCreated",
958 Some("user-123"),
959 &serde_json::json!({"name": "Alice", "email": "alice@example.com"}),
960 Utc::now(),
961 )
962 .await
963 .unwrap();
964
965 hybrid_engine
966 .index_event(
967 id2,
968 "tenant-1",
969 "OrderPlaced",
970 Some("order-456"),
971 &serde_json::json!({"product": "Widget", "quantity": 5}),
972 Utc::now(),
973 )
974 .await
975 .unwrap();
976
977 let query = SearchQuery::new("Alice user").with_tenant("tenant-1");
979 let results = hybrid_engine.search(&query).unwrap();
980
981 assert!(!results.is_empty());
982 assert_eq!(results[0].event_id, id1);
984 }
985
986 #[tokio::test]
987 async fn test_search_with_event_type_filter() {
988 let (vector_engine, keyword_engine) = create_test_engines();
989 let hybrid_engine = HybridSearchEngine::new(vector_engine, keyword_engine);
990
991 let id1 = Uuid::new_v4();
992 let id2 = Uuid::new_v4();
993
994 hybrid_engine
995 .index_event(
996 id1,
997 "tenant-1",
998 "UserCreated",
999 None,
1000 &serde_json::json!({"data": "test user"}),
1001 Utc::now(),
1002 )
1003 .await
1004 .unwrap();
1005
1006 hybrid_engine
1007 .index_event(
1008 id2,
1009 "tenant-1",
1010 "OrderPlaced",
1011 None,
1012 &serde_json::json!({"data": "test order"}),
1013 Utc::now(),
1014 )
1015 .await
1016 .unwrap();
1017
1018 let query = SearchQuery::new("test")
1020 .with_tenant("tenant-1")
1021 .with_event_type("UserCreated");
1022 let results = hybrid_engine.search(&query).unwrap();
1023
1024 assert_eq!(results.len(), 1);
1025 assert_eq!(results[0].event_type, Some("UserCreated".to_string()));
1026 }
1027
1028 #[tokio::test]
1029 async fn test_search_with_time_range_filter() {
1030 let (vector_engine, keyword_engine) = create_test_engines();
1031 let hybrid_engine = HybridSearchEngine::new(vector_engine, keyword_engine);
1032
1033 let now = Utc::now();
1034 let past = now - chrono::Duration::hours(2);
1035 let recent = now - chrono::Duration::minutes(30);
1036
1037 let id1 = Uuid::new_v4();
1038 let id2 = Uuid::new_v4();
1039
1040 hybrid_engine
1042 .index_event(
1043 id1,
1044 "tenant-1",
1045 "Event",
1046 None,
1047 &serde_json::json!({"data": "old event"}),
1048 past,
1049 )
1050 .await
1051 .unwrap();
1052
1053 hybrid_engine
1055 .index_event(
1056 id2,
1057 "tenant-1",
1058 "Event",
1059 None,
1060 &serde_json::json!({"data": "recent event"}),
1061 recent,
1062 )
1063 .await
1064 .unwrap();
1065
1066 let one_hour_ago = now - chrono::Duration::hours(1);
1068 let query = SearchQuery::new("event")
1069 .with_tenant("tenant-1")
1070 .with_time_range(one_hour_ago, now);
1071 let results = hybrid_engine.search(&query).unwrap();
1072
1073 assert_eq!(results.len(), 1);
1074 assert_eq!(results[0].event_id, id2);
1075 }
1076
1077 #[test]
1078 fn test_health_check() {
1079 let (vector_engine, keyword_engine) = create_test_engines();
1080 let hybrid_engine = HybridSearchEngine::new(vector_engine, keyword_engine);
1081 assert!(hybrid_engine.health_check().is_ok());
1082 }
1083
1084 #[test]
1085 fn test_stats() {
1086 let (vector_engine, keyword_engine) = create_test_engines();
1087 let hybrid_engine = HybridSearchEngine::new(vector_engine, keyword_engine);
1088
1089 let (total, semantic, keyword, hybrid) = hybrid_engine.stats();
1090 assert_eq!(total, 0);
1091 assert_eq!(semantic, 0);
1092 assert_eq!(keyword, 0);
1093 assert_eq!(hybrid, 0);
1094 }
1095}