Skip to main content

engram/search/
mod.rs

1//! Search functionality for Engram
2//!
3//! Implements:
4//! - BM25 full-text search (RML-876)
5//! - Fuzzy/typo-tolerant search (RML-877)
6//! - Search result explanation (RML-878)
7//! - Adaptive search strategy (RML-898)
8//! - Hybrid search with RRF
9//! - Aggregation queries (RML-880)
10//! - Search result reranking (RML-927)
11//! - Search result caching with adaptive thresholds (ENG-36)
12
13mod aggregation;
14mod bm25;
15mod fuzzy;
16mod hybrid;
17mod metadata;
18mod rerank;
19pub mod result_cache;
20
21pub use aggregation::*;
22pub use bm25::*;
23pub use fuzzy::*;
24pub use hybrid::*;
25pub use metadata::*;
26pub use rerank::*;
27pub use result_cache::*;
28
29use crate::types::SearchStrategy;
30
31/// Analyze query to determine optimal search strategy (RML-898)
32pub fn select_search_strategy(query: &str) -> SearchStrategy {
33    let query = query.trim();
34    let word_count = query.split_whitespace().count();
35    let has_quotes = query.contains('"');
36    let has_operators = query.contains(':')
37        || query.contains(" AND ")
38        || query.contains(" OR ")
39        || query.contains(" NOT ");
40    let has_special = query.contains('*') || query.contains('?');
41
42    // Explicit search syntax → keyword only
43    if has_quotes || has_operators || has_special {
44        return SearchStrategy::KeywordOnly;
45    }
46
47    // Very short queries → keyword (faster, usually precise enough)
48    if word_count <= 2 {
49        return SearchStrategy::KeywordOnly;
50    }
51
52    // Long conceptual queries → semantic
53    if word_count >= 8 {
54        return SearchStrategy::SemanticOnly;
55    }
56
57    // Default → hybrid
58    SearchStrategy::Hybrid
59}
60
61/// Strategy for deduplicating search results across result sets
62#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
63pub enum DedupeStrategy {
64    /// Deduplicate by memory ID (default, fastest)
65    #[default]
66    ById,
67    /// Deduplicate by content hash (catches duplicates with different IDs)
68    ByContentHash,
69}
70
71/// Configuration for search thresholds
72#[derive(Debug, Clone)]
73pub struct SearchConfig {
74    /// Word count threshold for short queries (keyword-only)
75    pub short_threshold: usize,
76    /// Word count threshold for long queries (semantic-only)
77    pub long_threshold: usize,
78    /// Minimum score to include in results
79    pub min_score: f32,
80    /// Weight for keyword score in hybrid search
81    pub keyword_weight: f32,
82    /// Weight for semantic score in hybrid search
83    pub semantic_weight: f32,
84    /// RRF constant (k parameter, default: 60)
85    /// Higher values favor lower-ranked results, lower values favor top results
86    pub rrf_k: f32,
87    /// Boost factor for project context memories when metadata.project_path matches cwd
88    pub project_context_boost: f32,
89    /// Current working directory for project context matching
90    pub project_context_path: Option<String>,
91    /// Deduplication strategy for hybrid search
92    pub dedupe_strategy: DedupeStrategy,
93}
94
95impl Default for SearchConfig {
96    fn default() -> Self {
97        Self {
98            short_threshold: 2,
99            long_threshold: 8,
100            min_score: 0.1,
101            keyword_weight: 0.4,
102            semantic_weight: 0.6,
103            rrf_k: 60.0,
104            project_context_boost: 0.2,
105            project_context_path: None,
106            dedupe_strategy: DedupeStrategy::default(),
107        }
108    }
109}
110
111#[cfg(test)]
112mod tests {
113    use super::*;
114
115    #[test]
116    fn test_strategy_selection() {
117        // Short queries → keyword
118        assert_eq!(select_search_strategy("auth"), SearchStrategy::KeywordOnly);
119        assert_eq!(
120            select_search_strategy("jwt token"),
121            SearchStrategy::KeywordOnly
122        );
123
124        // Quoted → keyword
125        assert_eq!(
126            select_search_strategy("\"exact phrase\""),
127            SearchStrategy::KeywordOnly
128        );
129
130        // Operators → keyword
131        assert_eq!(
132            select_search_strategy("auth AND jwt"),
133            SearchStrategy::KeywordOnly
134        );
135
136        // Medium → hybrid
137        assert_eq!(
138            select_search_strategy("how does authentication work"),
139            SearchStrategy::Hybrid
140        );
141
142        // Long → semantic
143        assert_eq!(
144            select_search_strategy(
145                "explain the authentication flow with jwt tokens and refresh mechanism"
146            ),
147            SearchStrategy::SemanticOnly
148        );
149    }
150}