aurora_semantic/search/
query.rs

1//! Search query types and filters.
2
3use serde::{Deserialize, Serialize};
4use std::path::PathBuf;
5
6use crate::config::SearchMode;
7use crate::types::{ChunkType, Language};
8
9/// A search query with options.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct SearchQuery {
12    /// The search text.
13    pub text: String,
14    /// Search mode (lexical, semantic, or hybrid).
15    pub mode: SearchMode,
16    /// Maximum number of results.
17    pub limit: usize,
18    /// Minimum score threshold (0.0 to 1.0).
19    pub min_score: f32,
20    /// Optional filters.
21    pub filter: Option<SearchFilter>,
22}
23
24impl SearchQuery {
25    /// Create a new search query.
26    pub fn new(text: impl Into<String>) -> Self {
27        Self {
28            text: text.into(),
29            mode: SearchMode::Hybrid,
30            limit: 20,
31            min_score: 0.0,
32            filter: None,
33        }
34    }
35
36    /// Set the search mode.
37    pub fn mode(mut self, mode: SearchMode) -> Self {
38        self.mode = mode;
39        self
40    }
41
42    /// Set the result limit.
43    pub fn limit(mut self, limit: usize) -> Self {
44        self.limit = limit;
45        self
46    }
47
48    /// Set the minimum score threshold.
49    pub fn min_score(mut self, score: f32) -> Self {
50        self.min_score = score.clamp(0.0, 1.0);
51        self
52    }
53
54    /// Set a filter.
55    pub fn filter(mut self, filter: SearchFilter) -> Self {
56        self.filter = Some(filter);
57        self
58    }
59
60    /// Extract search terms from the query text.
61    pub fn terms(&self) -> Vec<String> {
62        self.text
63            .split_whitespace()
64            .filter(|s| s.len() >= 2) // Skip very short terms
65            .map(|s| s.to_lowercase())
66            .collect()
67    }
68
69    /// Check if this is an empty query.
70    pub fn is_empty(&self) -> bool {
71        self.text.trim().is_empty()
72    }
73}
74
75/// Filters to apply to search results.
76#[derive(Debug, Clone, Default, Serialize, Deserialize)]
77pub struct SearchFilter {
78    /// Filter by languages.
79    pub languages: Option<Vec<Language>>,
80    /// Filter by chunk types.
81    pub chunk_types: Option<Vec<ChunkType>>,
82    /// Filter by file path patterns (glob).
83    pub path_patterns: Option<Vec<String>>,
84    /// Filter by symbol names (partial match).
85    pub symbol_names: Option<Vec<String>>,
86    /// Include only files in these directories.
87    pub directories: Option<Vec<PathBuf>>,
88    /// Exclude files in these directories.
89    pub exclude_directories: Option<Vec<PathBuf>>,
90}
91
92impl SearchFilter {
93    /// Create an empty filter.
94    pub fn new() -> Self {
95        Self::default()
96    }
97
98    /// Filter by languages.
99    pub fn languages(mut self, languages: Vec<Language>) -> Self {
100        self.languages = Some(languages);
101        self
102    }
103
104    /// Filter by chunk types.
105    pub fn chunk_types(mut self, types: Vec<ChunkType>) -> Self {
106        self.chunk_types = Some(types);
107        self
108    }
109
110    /// Filter by path patterns.
111    pub fn path_patterns(mut self, patterns: Vec<String>) -> Self {
112        self.path_patterns = Some(patterns);
113        self
114    }
115
116    /// Filter by symbol names.
117    pub fn symbol_names(mut self, names: Vec<String>) -> Self {
118        self.symbol_names = Some(names);
119        self
120    }
121
122    /// Include only files in specific directories.
123    pub fn in_directories(mut self, dirs: Vec<PathBuf>) -> Self {
124        self.directories = Some(dirs);
125        self
126    }
127
128    /// Exclude files in specific directories.
129    pub fn exclude_directories(mut self, dirs: Vec<PathBuf>) -> Self {
130        self.exclude_directories = Some(dirs);
131        self
132    }
133
134    /// Check if a chunk matches the filter.
135    pub fn matches(
136        &self,
137        chunk: &crate::types::Chunk,
138        document: &crate::types::Document,
139    ) -> bool {
140        // Check language filter
141        if let Some(ref languages) = self.languages {
142            if !languages.contains(&document.language) {
143                return false;
144            }
145        }
146
147        // Check chunk type filter
148        if let Some(ref types) = self.chunk_types {
149            if !types.contains(&chunk.chunk_type) {
150                return false;
151            }
152        }
153
154        // Check path pattern filter
155        if let Some(ref patterns) = self.path_patterns {
156            let path_str = document.relative_path.to_string_lossy();
157            let matches_pattern = patterns.iter().any(|pattern| {
158                glob::Pattern::new(pattern)
159                    .map(|p| p.matches(&path_str))
160                    .unwrap_or(false)
161            });
162            if !matches_pattern {
163                return false;
164            }
165        }
166
167        // Check symbol name filter
168        if let Some(ref names) = self.symbol_names {
169            if let Some(ref symbol) = chunk.symbol_name {
170                let symbol_lower = symbol.to_lowercase();
171                let matches_symbol = names
172                    .iter()
173                    .any(|n| symbol_lower.contains(&n.to_lowercase()));
174                if !matches_symbol {
175                    return false;
176                }
177            } else {
178                return false;
179            }
180        }
181
182        // Check directory inclusion
183        if let Some(ref dirs) = self.directories {
184            let in_dir = dirs.iter().any(|dir| {
185                document.relative_path.starts_with(dir)
186            });
187            if !in_dir {
188                return false;
189            }
190        }
191
192        // Check directory exclusion
193        if let Some(ref dirs) = self.exclude_directories {
194            let in_excluded = dirs.iter().any(|dir| {
195                document.relative_path.starts_with(dir)
196            });
197            if in_excluded {
198                return false;
199            }
200        }
201
202        true
203    }
204
205    /// Check if the filter is empty (no constraints).
206    pub fn is_empty(&self) -> bool {
207        self.languages.is_none()
208            && self.chunk_types.is_none()
209            && self.path_patterns.is_none()
210            && self.symbol_names.is_none()
211            && self.directories.is_none()
212            && self.exclude_directories.is_none()
213    }
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219
220    #[test]
221    fn test_search_query() {
222        let query = SearchQuery::new("fn main")
223            .mode(SearchMode::Lexical)
224            .limit(10)
225            .min_score(0.5);
226
227        assert_eq!(query.text, "fn main");
228        assert_eq!(query.mode, SearchMode::Lexical);
229        assert_eq!(query.limit, 10);
230        assert_eq!(query.min_score, 0.5);
231    }
232
233    #[test]
234    fn test_query_terms() {
235        let query = SearchQuery::new("hello world a test");
236        let terms = query.terms();
237
238        assert_eq!(terms.len(), 3); // "a" is too short
239        assert!(terms.contains(&"hello".to_string()));
240        assert!(terms.contains(&"world".to_string()));
241        assert!(terms.contains(&"test".to_string()));
242    }
243
244    #[test]
245    fn test_filter_language() {
246        let filter = SearchFilter::new().languages(vec![Language::Rust]);
247
248        let chunk = crate::types::Chunk {
249            id: crate::types::ChunkId::new(),
250            document_id: crate::types::DocumentId::new(),
251            content: "test".to_string(),
252            chunk_type: ChunkType::Function,
253            start_line: 1,
254            end_line: 1,
255            start_byte: 0,
256            end_byte: 4,
257            symbol_name: None,
258            parent_symbol: None,
259        };
260
261        let rust_doc = crate::types::Document {
262            id: crate::types::DocumentId::new(),
263            relative_path: PathBuf::from("test.rs"),
264            absolute_path: PathBuf::from("/test.rs"),
265            language: Language::Rust,
266            content_hash: "hash".to_string(),
267            size_bytes: 100,
268            modified_at: chrono::Utc::now(),
269        };
270
271        let py_doc = crate::types::Document {
272            id: crate::types::DocumentId::new(),
273            relative_path: PathBuf::from("test.py"),
274            absolute_path: PathBuf::from("/test.py"),
275            language: Language::Python,
276            content_hash: "hash".to_string(),
277            size_bytes: 100,
278            modified_at: chrono::Utc::now(),
279        };
280
281        assert!(filter.matches(&chunk, &rust_doc));
282        assert!(!filter.matches(&chunk, &py_doc));
283    }
284}