oxirs_vec/hybrid_search/
config.rs

1//! Configuration for hybrid search
2
3use super::types::SearchWeights;
4use serde::{Deserialize, Serialize};
5
6/// Hybrid search configuration
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct HybridSearchConfig {
9    /// Search mode
10    pub mode: SearchMode,
11    /// Keyword search algorithm
12    pub keyword_algorithm: KeywordAlgorithm,
13    /// Fusion strategy for combining results
14    pub fusion_strategy: RankFusionStrategy,
15    /// Default search weights
16    pub default_weights: SearchWeights,
17    /// Enable query expansion
18    pub enable_query_expansion: bool,
19    /// Maximum expanded terms
20    pub max_expanded_terms: usize,
21    /// Minimum keyword score threshold
22    pub min_keyword_score: f32,
23    /// Minimum semantic score threshold
24    pub min_semantic_score: f32,
25    /// Enable re-ranking
26    pub enable_reranking: bool,
27    /// Number of candidates for re-ranking
28    pub reranking_candidates: usize,
29}
30
31/// Search mode
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
33pub enum SearchMode {
34    /// Keyword search only
35    KeywordOnly,
36    /// Semantic search only
37    SemanticOnly,
38    /// Hybrid search (both keyword and semantic)
39    Hybrid,
40    /// Adaptive (automatically choose based on query)
41    Adaptive,
42}
43
44/// Keyword search algorithm
45#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
46pub enum KeywordAlgorithm {
47    /// BM25 (Okapi BM25)
48    Bm25,
49    /// TF-IDF (Term Frequency - Inverse Document Frequency)
50    Tfidf,
51    /// Combined (use both and take max)
52    Combined,
53}
54
55/// Strategy for fusing keyword and semantic results
56#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
57pub enum RankFusionStrategy {
58    /// Weighted sum of scores
59    WeightedSum,
60    /// Reciprocal Rank Fusion (RRF)
61    ReciprocalRankFusion,
62    /// Cascade (filter with keyword, re-rank with semantic)
63    Cascade,
64    /// Interleave results from both
65    Interleave,
66}
67
68impl Default for HybridSearchConfig {
69    fn default() -> Self {
70        Self {
71            mode: SearchMode::Hybrid,
72            keyword_algorithm: KeywordAlgorithm::Bm25,
73            fusion_strategy: RankFusionStrategy::ReciprocalRankFusion,
74            default_weights: SearchWeights::default(),
75            enable_query_expansion: true,
76            max_expanded_terms: 5,
77            min_keyword_score: 0.1,
78            min_semantic_score: 0.3,
79            enable_reranking: true,
80            reranking_candidates: 100,
81        }
82    }
83}
84
85impl HybridSearchConfig {
86    /// Validate configuration
87    pub fn validate(&self) -> anyhow::Result<()> {
88        self.default_weights.validate()?;
89
90        if self.max_expanded_terms == 0 {
91            anyhow::bail!("max_expanded_terms must be positive");
92        }
93
94        if self.min_keyword_score < 0.0 || self.min_keyword_score > 1.0 {
95            anyhow::bail!("min_keyword_score must be in [0.0, 1.0]");
96        }
97
98        if self.min_semantic_score < 0.0 || self.min_semantic_score > 1.0 {
99            anyhow::bail!("min_semantic_score must be in [0.0, 1.0]");
100        }
101
102        if self.reranking_candidates == 0 {
103            anyhow::bail!("reranking_candidates must be positive");
104        }
105
106        Ok(())
107    }
108
109    /// Create a keyword-only configuration
110    pub fn keyword_only() -> Self {
111        Self {
112            mode: SearchMode::KeywordOnly,
113            default_weights: SearchWeights {
114                keyword_weight: 1.0,
115                semantic_weight: 0.0,
116                recency_weight: 0.0,
117            },
118            ..Default::default()
119        }
120    }
121
122    /// Create a semantic-only configuration
123    pub fn semantic_only() -> Self {
124        Self {
125            mode: SearchMode::SemanticOnly,
126            default_weights: SearchWeights {
127                keyword_weight: 0.0,
128                semantic_weight: 1.0,
129                recency_weight: 0.0,
130            },
131            ..Default::default()
132        }
133    }
134
135    /// Create a balanced hybrid configuration
136    pub fn balanced() -> Self {
137        Self {
138            mode: SearchMode::Hybrid,
139            default_weights: SearchWeights {
140                keyword_weight: 0.5,
141                semantic_weight: 0.5,
142                recency_weight: 0.0,
143            },
144            fusion_strategy: RankFusionStrategy::ReciprocalRankFusion,
145            ..Default::default()
146        }
147    }
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153
154    #[test]
155    fn test_default_config_is_valid() {
156        let config = HybridSearchConfig::default();
157        assert!(config.validate().is_ok());
158    }
159
160    #[test]
161    fn test_keyword_only_config() {
162        let config = HybridSearchConfig::keyword_only();
163        assert_eq!(config.mode, SearchMode::KeywordOnly);
164        assert!((config.default_weights.keyword_weight - 1.0).abs() < 0.001);
165    }
166
167    #[test]
168    fn test_semantic_only_config() {
169        let config = HybridSearchConfig::semantic_only();
170        assert_eq!(config.mode, SearchMode::SemanticOnly);
171        assert!((config.default_weights.semantic_weight - 1.0).abs() < 0.001);
172    }
173
174    #[test]
175    fn test_balanced_config() {
176        let config = HybridSearchConfig::balanced();
177        assert_eq!(config.mode, SearchMode::Hybrid);
178        assert!((config.default_weights.keyword_weight - 0.5).abs() < 0.001);
179        assert!((config.default_weights.semantic_weight - 0.5).abs() < 0.001);
180    }
181
182    #[test]
183    fn test_invalid_thresholds() {
184        let config = HybridSearchConfig {
185            min_keyword_score: 1.5,
186            ..Default::default()
187        };
188        assert!(config.validate().is_err());
189    }
190}