Skip to main content

engram/search/
mod.rs

1//! Search functionality for Engram
2//!
3//! Implements:
4//! - BM25 full-text search (RML-876)
5//! - Fuzzy/typo-tolerant search (RML-877)
6//! - Search result explanation (RML-878)
7//! - Adaptive search strategy (RML-898)
8//! - Hybrid search with RRF
9//! - Aggregation queries (RML-880)
10//! - Search result reranking (RML-927)
11//! - Search result caching with adaptive thresholds (ENG-36)
12
13mod aggregation;
14mod bm25;
15pub mod explain;
16pub mod feedback;
17mod fuzzy;
18mod hybrid;
19mod metadata;
20pub mod mmr;
21mod rerank;
22pub mod result_cache;
23pub mod semantic_cache;
24pub mod utility;
25
26#[cfg(feature = "neural-rerank")]
27pub mod neural_rerank;
28
29pub use aggregation::*;
30pub use bm25::*;
31pub use explain::*;
32pub use fuzzy::*;
33pub use hybrid::*;
34pub use metadata::*;
35pub use mmr::*;
36pub use rerank::*;
37pub use result_cache::*;
38
39use crate::types::SearchStrategy;
40
41/// Analyze query to determine optimal search strategy (RML-898)
42pub fn select_search_strategy(query: &str) -> SearchStrategy {
43    let query = query.trim();
44    let word_count = query.split_whitespace().count();
45    let has_quotes = query.contains('"');
46    let has_operators = query.contains(':')
47        || query.contains(" AND ")
48        || query.contains(" OR ")
49        || query.contains(" NOT ");
50    let has_special = query.contains('*') || query.contains('?');
51
52    // Explicit search syntax → keyword only
53    if has_quotes || has_operators || has_special {
54        return SearchStrategy::KeywordOnly;
55    }
56
57    // Very short queries → keyword (faster, usually precise enough)
58    if word_count <= 2 {
59        return SearchStrategy::KeywordOnly;
60    }
61
62    // Long conceptual queries → semantic
63    if word_count >= 8 {
64        return SearchStrategy::SemanticOnly;
65    }
66
67    // Default → hybrid
68    SearchStrategy::Hybrid
69}
70
71/// Strategy for deduplicating search results across result sets
72#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
73pub enum DedupeStrategy {
74    /// Deduplicate by memory ID (default, fastest)
75    #[default]
76    ById,
77    /// Deduplicate by content hash (catches duplicates with different IDs)
78    ByContentHash,
79}
80
81/// Configuration for search thresholds
82#[derive(Debug, Clone)]
83pub struct SearchConfig {
84    /// Word count threshold for short queries (keyword-only)
85    pub short_threshold: usize,
86    /// Word count threshold for long queries (semantic-only)
87    pub long_threshold: usize,
88    /// Minimum score to include in results
89    pub min_score: f32,
90    /// Weight for keyword score in hybrid search
91    pub keyword_weight: f32,
92    /// Weight for semantic score in hybrid search
93    pub semantic_weight: f32,
94    /// RRF constant (k parameter, default: 60)
95    /// Higher values favor lower-ranked results, lower values favor top results
96    pub rrf_k: f32,
97    /// Boost factor for project context memories when metadata.project_path matches cwd
98    pub project_context_boost: f32,
99    /// Current working directory for project context matching
100    pub project_context_path: Option<String>,
101    /// Deduplication strategy for hybrid search
102    pub dedupe_strategy: DedupeStrategy,
103}
104
105impl Default for SearchConfig {
106    fn default() -> Self {
107        Self {
108            short_threshold: 2,
109            long_threshold: 8,
110            min_score: 0.1,
111            keyword_weight: 0.4,
112            semantic_weight: 0.6,
113            rrf_k: 60.0,
114            project_context_boost: 0.2,
115            project_context_path: None,
116            dedupe_strategy: DedupeStrategy::default(),
117        }
118    }
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124
125    #[test]
126    fn test_strategy_selection() {
127        // Short queries → keyword
128        assert_eq!(select_search_strategy("auth"), SearchStrategy::KeywordOnly);
129        assert_eq!(
130            select_search_strategy("jwt token"),
131            SearchStrategy::KeywordOnly
132        );
133
134        // Quoted → keyword
135        assert_eq!(
136            select_search_strategy("\"exact phrase\""),
137            SearchStrategy::KeywordOnly
138        );
139
140        // Operators → keyword
141        assert_eq!(
142            select_search_strategy("auth AND jwt"),
143            SearchStrategy::KeywordOnly
144        );
145
146        // Medium → hybrid
147        assert_eq!(
148            select_search_strategy("how does authentication work"),
149            SearchStrategy::Hybrid
150        );
151
152        // Long → semantic
153        assert_eq!(
154            select_search_strategy(
155                "explain the authentication flow with jwt tokens and refresh mechanism"
156            ),
157            SearchStrategy::SemanticOnly
158        );
159    }
160}