brainwires_rag/rag/client/search.rs
1//! Core search, indexing dispatch, statistics, and clear operations for [`RagClient`].
2
3use super::RagClient;
4use crate::rag::types::*;
5use anyhow::{Context, Result};
6use std::time::Instant;
7
8impl RagClient {
9 /// Index a codebase directory
10 ///
11 /// This automatically performs full indexing for new codebases or incremental
12 /// updates for previously indexed codebases.
13 ///
14 /// # Example
15 ///
16 /// ```ignore
17 /// use crate::rag::{RagClient, IndexRequest};
18 ///
19 /// # async fn example() -> anyhow::Result<()> {
20 /// let client = RagClient::new().await?;
21 ///
22 /// let request = IndexRequest {
23 /// path: "/path/to/code".to_string(),
24 /// project: Some("my-project".to_string()),
25 /// include_patterns: vec!["**/*.rs".to_string()],
26 /// exclude_patterns: vec!["**/target/**".to_string()],
27 /// max_file_size: 1_048_576,
28 /// };
29 ///
30 /// let response = client.index_codebase(request).await?;
31 /// println!("Indexed {} files in {} ms",
32 /// response.files_indexed,
33 /// response.duration_ms);
34 /// # Ok(())
35 /// # }
36 /// ```
37 pub async fn index_codebase(&self, request: IndexRequest) -> Result<IndexResponse> {
38 // Validate request
39 request.validate().map_err(|e| anyhow::anyhow!(e))?;
40
41 // Use the smart indexing logic without progress notifications
42 // Default cancellation token - not cancellable from this API
43 let cancel_token = tokio_util::sync::CancellationToken::new();
44 super::indexing::do_index_smart(
45 self,
46 request.path,
47 request.project,
48 request.include_patterns,
49 request.exclude_patterns,
50 request.max_file_size,
51 None, // No peer
52 None, // No progress token
53 cancel_token,
54 )
55 .await
56 }
57
58 /// Query the indexed codebase using semantic search
59 ///
60 /// # Example
61 ///
62 /// ```ignore
63 /// use crate::rag::{RagClient, QueryRequest};
64 ///
65 /// # async fn example() -> anyhow::Result<()> {
66 /// let client = RagClient::new().await?;
67 ///
68 /// let request = QueryRequest {
69 /// query: "authentication logic".to_string(),
70 /// path: None,
71 /// project: Some("my-project".to_string()),
72 /// limit: 10,
73 /// min_score: 0.7,
74 /// hybrid: true,
75 /// };
76 ///
77 /// let response = client.query_codebase(request).await?;
78 /// for result in response.results {
79 /// println!("Found in {}: {:.2}", result.file_path, result.score);
80 /// println!("{}", result.content);
81 /// }
82 /// # Ok(())
83 /// # }
84 /// ```
85 pub async fn query_codebase(&self, request: QueryRequest) -> Result<QueryResponse> {
86 request.validate().map_err(|e| anyhow::anyhow!(e))?;
87
88 // Check if the target path is dirty (if path filter is specified)
89 self.check_path_not_dirty(request.path.as_deref()).await?;
90
91 let start = Instant::now();
92
93 let query_embedding = self
94 .embedding_provider
95 .embed(&request.query)
96 .context("Failed to generate query embedding")?;
97
98 let original_threshold = request.min_score;
99 let mut threshold_used = original_threshold;
100 let mut threshold_lowered = false;
101
102 let mut results = self
103 .vector_db
104 .search(
105 query_embedding.clone(),
106 &request.query,
107 request.limit,
108 threshold_used,
109 request.project.clone(),
110 request.path.clone(),
111 request.hybrid,
112 )
113 .await
114 .context("Failed to search")?;
115
116 if results.is_empty() && original_threshold > 0.3 {
117 let fallback_thresholds = [0.6, 0.5, 0.4, 0.3];
118
119 for &threshold in &fallback_thresholds {
120 if threshold >= original_threshold {
121 continue;
122 }
123
124 results = self
125 .vector_db
126 .search(
127 query_embedding.clone(),
128 &request.query,
129 request.limit,
130 threshold,
131 request.project.clone(),
132 request.path.clone(),
133 request.hybrid,
134 )
135 .await
136 .context("Failed to search")?;
137
138 if !results.is_empty() {
139 threshold_used = threshold;
140 threshold_lowered = true;
141 break;
142 }
143 }
144 }
145
146 Ok(QueryResponse {
147 results,
148 duration_ms: start.elapsed().as_millis() as u64,
149 threshold_used,
150 threshold_lowered,
151 })
152 }
153
154 /// Advanced search with filters for file type, language, and path patterns
155 pub async fn search_with_filters(
156 &self,
157 request: AdvancedSearchRequest,
158 ) -> Result<QueryResponse> {
159 request.validate().map_err(|e| anyhow::anyhow!(e))?;
160
161 // Check if the target path is dirty (if path filter is specified)
162 self.check_path_not_dirty(request.path.as_deref()).await?;
163
164 let start = Instant::now();
165
166 let query_embedding = self
167 .embedding_provider
168 .embed(&request.query)
169 .context("Failed to generate query embedding")?;
170
171 let original_threshold = request.min_score;
172 let mut threshold_used = original_threshold;
173 let mut threshold_lowered = false;
174
175 let mut results = self
176 .vector_db
177 .search_filtered(
178 query_embedding.clone(),
179 &request.query,
180 request.limit,
181 threshold_used,
182 request.project.clone(),
183 request.path.clone(),
184 true,
185 request.file_extensions.clone(),
186 request.languages.clone(),
187 request.path_patterns.clone(),
188 )
189 .await
190 .context("Failed to search with filters")?;
191
192 // Adaptive threshold lowering if no results found
193 if results.is_empty() && original_threshold > 0.3 {
194 let fallback_thresholds = [0.6, 0.5, 0.4, 0.3];
195
196 for &threshold in &fallback_thresholds {
197 if threshold >= original_threshold {
198 continue;
199 }
200
201 results = self
202 .vector_db
203 .search_filtered(
204 query_embedding.clone(),
205 &request.query,
206 request.limit,
207 threshold,
208 request.project.clone(),
209 request.path.clone(),
210 true,
211 request.file_extensions.clone(),
212 request.languages.clone(),
213 request.path_patterns.clone(),
214 )
215 .await
216 .context("Failed to search with filters")?;
217
218 if !results.is_empty() {
219 threshold_used = threshold;
220 threshold_lowered = true;
221 break;
222 }
223 }
224 }
225
226 Ok(QueryResponse {
227 results,
228 duration_ms: start.elapsed().as_millis() as u64,
229 threshold_used,
230 threshold_lowered,
231 })
232 }
233
234 /// Get statistics about the indexed codebase
235 pub async fn get_statistics(&self) -> Result<StatisticsResponse> {
236 let stats = self
237 .vector_db
238 .get_statistics()
239 .await
240 .context("Failed to get statistics")?;
241
242 let language_breakdown = stats
243 .language_breakdown
244 .into_iter()
245 .map(|(language, count)| LanguageStats {
246 language,
247 file_count: count,
248 chunk_count: count,
249 })
250 .collect();
251
252 Ok(StatisticsResponse {
253 total_files: stats.total_points,
254 total_chunks: stats.total_vectors,
255 total_embeddings: stats.total_vectors,
256 database_size_bytes: 0,
257 language_breakdown,
258 })
259 }
260
261 /// Clear all indexed data from the vector database and hash cache
262 pub async fn clear_index(&self) -> Result<ClearResponse> {
263 match self.vector_db.clear().await {
264 Ok(_) => {
265 // Clear hash cache (both roots and dirty_roots)
266 let mut cache = self.hash_cache.write().await;
267 cache.roots.clear();
268 cache.dirty_roots.clear();
269
270 // Delete cache file directly for robustness (in case save fails)
271 if self.cache_path.exists() {
272 if let Err(e) = std::fs::remove_file(&self.cache_path) {
273 tracing::warn!("Failed to delete hash cache file: {}", e);
274 } else {
275 tracing::info!("Deleted hash cache file: {:?}", self.cache_path);
276 }
277 }
278
279 // Save empty cache (recreates the file with empty state)
280 if let Err(e) = cache.save(&self.cache_path) {
281 tracing::warn!("Failed to save cleared cache: {}", e);
282 }
283
284 // Also clear git cache
285 let mut git_cache = self.git_cache.write().await;
286 git_cache.repos.clear();
287 if self.git_cache_path.exists() {
288 if let Err(e) = std::fs::remove_file(&self.git_cache_path) {
289 tracing::warn!("Failed to delete git cache file: {}", e);
290 } else {
291 tracing::info!("Deleted git cache file: {:?}", self.git_cache_path);
292 }
293 }
294 if let Err(e) = git_cache.save(&self.git_cache_path) {
295 tracing::warn!("Failed to save cleared git cache: {}", e);
296 }
297
298 if let Err(e) = self
299 .vector_db
300 .initialize(self.embedding_provider.dimension())
301 .await
302 {
303 Ok(ClearResponse {
304 success: false,
305 message: format!("Cleared but failed to reinitialize: {}", e),
306 })
307 } else {
308 Ok(ClearResponse {
309 success: true,
310 message: "Successfully cleared all indexed data and cache".to_string(),
311 })
312 }
313 }
314 Err(e) => Ok(ClearResponse {
315 success: false,
316 message: format!("Failed to clear index: {}", e),
317 }),
318 }
319 }
320
321 /// Search git commit history using semantic search
322 ///
323 /// # Example
324 ///
325 /// ```ignore
326 /// use crate::rag::{RagClient, SearchGitHistoryRequest};
327 ///
328 /// # async fn example() -> anyhow::Result<()> {
329 /// let client = RagClient::new().await?;
330 ///
331 /// let request = SearchGitHistoryRequest {
332 /// query: "bug fix authentication".to_string(),
333 /// path: "/path/to/repo".to_string(),
334 /// project: None,
335 /// branch: None,
336 /// max_commits: 100,
337 /// limit: 10,
338 /// min_score: 0.7,
339 /// author: None,
340 /// since: None,
341 /// until: None,
342 /// file_pattern: None,
343 /// };
344 ///
345 /// let response = client.search_git_history(request).await?;
346 /// for result in response.results {
347 /// println!("Commit {}: {}", result.commit_hash, result.commit_message);
348 /// }
349 /// # Ok(())
350 /// # }
351 /// ```
352 pub async fn search_git_history(
353 &self,
354 request: SearchGitHistoryRequest,
355 ) -> Result<SearchGitHistoryResponse> {
356 // Validate request
357 request.validate().map_err(|e| anyhow::anyhow!(e))?;
358
359 // Forward to git indexing implementation
360 super::git_indexing::do_search_git_history(
361 self.embedding_provider.clone(),
362 self.vector_db.clone(),
363 self.git_cache.clone(),
364 &self.git_cache_path,
365 request,
366 )
367 .await
368 }
369}