Skip to main content

brainwires_rag/rag/client/
search.rs

1//! Core search, indexing dispatch, statistics, and clear operations for [`RagClient`].
2
3use super::RagClient;
4use crate::rag::types::*;
5use anyhow::{Context, Result};
6use std::time::Instant;
7
8impl RagClient {
9    /// Index a codebase directory
10    ///
11    /// This automatically performs full indexing for new codebases or incremental
12    /// updates for previously indexed codebases.
13    ///
14    /// # Example
15    ///
16    /// ```ignore
17    /// use crate::rag::{RagClient, IndexRequest};
18    ///
19    /// # async fn example() -> anyhow::Result<()> {
20    /// let client = RagClient::new().await?;
21    ///
22    /// let request = IndexRequest {
23    ///     path: "/path/to/code".to_string(),
24    ///     project: Some("my-project".to_string()),
25    ///     include_patterns: vec!["**/*.rs".to_string()],
26    ///     exclude_patterns: vec!["**/target/**".to_string()],
27    ///     max_file_size: 1_048_576,
28    /// };
29    ///
30    /// let response = client.index_codebase(request).await?;
31    /// println!("Indexed {} files in {} ms",
32    ///          response.files_indexed,
33    ///          response.duration_ms);
34    /// # Ok(())
35    /// # }
36    /// ```
37    pub async fn index_codebase(&self, request: IndexRequest) -> Result<IndexResponse> {
38        // Validate request
39        request.validate().map_err(|e| anyhow::anyhow!(e))?;
40
41        // Use the smart indexing logic without progress notifications
42        // Default cancellation token - not cancellable from this API
43        let cancel_token = tokio_util::sync::CancellationToken::new();
44        super::indexing::do_index_smart(
45            self,
46            request.path,
47            request.project,
48            request.include_patterns,
49            request.exclude_patterns,
50            request.max_file_size,
51            None, // No peer
52            None, // No progress token
53            cancel_token,
54        )
55        .await
56    }
57
58    /// Query the indexed codebase using semantic search
59    ///
60    /// # Example
61    ///
62    /// ```ignore
63    /// use crate::rag::{RagClient, QueryRequest};
64    ///
65    /// # async fn example() -> anyhow::Result<()> {
66    /// let client = RagClient::new().await?;
67    ///
68    /// let request = QueryRequest {
69    ///     query: "authentication logic".to_string(),
70    ///     path: None,
71    ///     project: Some("my-project".to_string()),
72    ///     limit: 10,
73    ///     min_score: 0.7,
74    ///     hybrid: true,
75    /// };
76    ///
77    /// let response = client.query_codebase(request).await?;
78    /// for result in response.results {
79    ///     println!("Found in {}: {:.2}", result.file_path, result.score);
80    ///     println!("{}", result.content);
81    /// }
82    /// # Ok(())
83    /// # }
84    /// ```
85    pub async fn query_codebase(&self, request: QueryRequest) -> Result<QueryResponse> {
86        request.validate().map_err(|e| anyhow::anyhow!(e))?;
87
88        // Check if the target path is dirty (if path filter is specified)
89        self.check_path_not_dirty(request.path.as_deref()).await?;
90
91        let start = Instant::now();
92
93        let query_embedding = self
94            .embedding_provider
95            .embed(&request.query)
96            .context("Failed to generate query embedding")?;
97
98        let original_threshold = request.min_score;
99        let mut threshold_used = original_threshold;
100        let mut threshold_lowered = false;
101
102        let mut results = self
103            .vector_db
104            .search(
105                query_embedding.clone(),
106                &request.query,
107                request.limit,
108                threshold_used,
109                request.project.clone(),
110                request.path.clone(),
111                request.hybrid,
112            )
113            .await
114            .context("Failed to search")?;
115
116        if results.is_empty() && original_threshold > 0.3 {
117            let fallback_thresholds = [0.6, 0.5, 0.4, 0.3];
118
119            for &threshold in &fallback_thresholds {
120                if threshold >= original_threshold {
121                    continue;
122                }
123
124                results = self
125                    .vector_db
126                    .search(
127                        query_embedding.clone(),
128                        &request.query,
129                        request.limit,
130                        threshold,
131                        request.project.clone(),
132                        request.path.clone(),
133                        request.hybrid,
134                    )
135                    .await
136                    .context("Failed to search")?;
137
138                if !results.is_empty() {
139                    threshold_used = threshold;
140                    threshold_lowered = true;
141                    break;
142                }
143            }
144        }
145
146        Ok(QueryResponse {
147            results,
148            duration_ms: start.elapsed().as_millis() as u64,
149            threshold_used,
150            threshold_lowered,
151        })
152    }
153
154    /// Advanced search with filters for file type, language, and path patterns
155    pub async fn search_with_filters(
156        &self,
157        request: AdvancedSearchRequest,
158    ) -> Result<QueryResponse> {
159        request.validate().map_err(|e| anyhow::anyhow!(e))?;
160
161        // Check if the target path is dirty (if path filter is specified)
162        self.check_path_not_dirty(request.path.as_deref()).await?;
163
164        let start = Instant::now();
165
166        let query_embedding = self
167            .embedding_provider
168            .embed(&request.query)
169            .context("Failed to generate query embedding")?;
170
171        let original_threshold = request.min_score;
172        let mut threshold_used = original_threshold;
173        let mut threshold_lowered = false;
174
175        let mut results = self
176            .vector_db
177            .search_filtered(
178                query_embedding.clone(),
179                &request.query,
180                request.limit,
181                threshold_used,
182                request.project.clone(),
183                request.path.clone(),
184                true,
185                request.file_extensions.clone(),
186                request.languages.clone(),
187                request.path_patterns.clone(),
188            )
189            .await
190            .context("Failed to search with filters")?;
191
192        // Adaptive threshold lowering if no results found
193        if results.is_empty() && original_threshold > 0.3 {
194            let fallback_thresholds = [0.6, 0.5, 0.4, 0.3];
195
196            for &threshold in &fallback_thresholds {
197                if threshold >= original_threshold {
198                    continue;
199                }
200
201                results = self
202                    .vector_db
203                    .search_filtered(
204                        query_embedding.clone(),
205                        &request.query,
206                        request.limit,
207                        threshold,
208                        request.project.clone(),
209                        request.path.clone(),
210                        true,
211                        request.file_extensions.clone(),
212                        request.languages.clone(),
213                        request.path_patterns.clone(),
214                    )
215                    .await
216                    .context("Failed to search with filters")?;
217
218                if !results.is_empty() {
219                    threshold_used = threshold;
220                    threshold_lowered = true;
221                    break;
222                }
223            }
224        }
225
226        Ok(QueryResponse {
227            results,
228            duration_ms: start.elapsed().as_millis() as u64,
229            threshold_used,
230            threshold_lowered,
231        })
232    }
233
234    /// Get statistics about the indexed codebase
235    pub async fn get_statistics(&self) -> Result<StatisticsResponse> {
236        let stats = self
237            .vector_db
238            .get_statistics()
239            .await
240            .context("Failed to get statistics")?;
241
242        let language_breakdown = stats
243            .language_breakdown
244            .into_iter()
245            .map(|(language, count)| LanguageStats {
246                language,
247                file_count: count,
248                chunk_count: count,
249            })
250            .collect();
251
252        Ok(StatisticsResponse {
253            total_files: stats.total_points,
254            total_chunks: stats.total_vectors,
255            total_embeddings: stats.total_vectors,
256            database_size_bytes: 0,
257            language_breakdown,
258        })
259    }
260
261    /// Clear all indexed data from the vector database and hash cache
262    pub async fn clear_index(&self) -> Result<ClearResponse> {
263        match self.vector_db.clear().await {
264            Ok(_) => {
265                // Clear hash cache (both roots and dirty_roots)
266                let mut cache = self.hash_cache.write().await;
267                cache.roots.clear();
268                cache.dirty_roots.clear();
269
270                // Delete cache file directly for robustness (in case save fails)
271                if self.cache_path.exists() {
272                    if let Err(e) = std::fs::remove_file(&self.cache_path) {
273                        tracing::warn!("Failed to delete hash cache file: {}", e);
274                    } else {
275                        tracing::info!("Deleted hash cache file: {:?}", self.cache_path);
276                    }
277                }
278
279                // Save empty cache (recreates the file with empty state)
280                if let Err(e) = cache.save(&self.cache_path) {
281                    tracing::warn!("Failed to save cleared cache: {}", e);
282                }
283
284                // Also clear git cache
285                let mut git_cache = self.git_cache.write().await;
286                git_cache.repos.clear();
287                if self.git_cache_path.exists() {
288                    if let Err(e) = std::fs::remove_file(&self.git_cache_path) {
289                        tracing::warn!("Failed to delete git cache file: {}", e);
290                    } else {
291                        tracing::info!("Deleted git cache file: {:?}", self.git_cache_path);
292                    }
293                }
294                if let Err(e) = git_cache.save(&self.git_cache_path) {
295                    tracing::warn!("Failed to save cleared git cache: {}", e);
296                }
297
298                if let Err(e) = self
299                    .vector_db
300                    .initialize(self.embedding_provider.dimension())
301                    .await
302                {
303                    Ok(ClearResponse {
304                        success: false,
305                        message: format!("Cleared but failed to reinitialize: {}", e),
306                    })
307                } else {
308                    Ok(ClearResponse {
309                        success: true,
310                        message: "Successfully cleared all indexed data and cache".to_string(),
311                    })
312                }
313            }
314            Err(e) => Ok(ClearResponse {
315                success: false,
316                message: format!("Failed to clear index: {}", e),
317            }),
318        }
319    }
320
321    /// Search git commit history using semantic search
322    ///
323    /// # Example
324    ///
325    /// ```ignore
326    /// use crate::rag::{RagClient, SearchGitHistoryRequest};
327    ///
328    /// # async fn example() -> anyhow::Result<()> {
329    /// let client = RagClient::new().await?;
330    ///
331    /// let request = SearchGitHistoryRequest {
332    ///     query: "bug fix authentication".to_string(),
333    ///     path: "/path/to/repo".to_string(),
334    ///     project: None,
335    ///     branch: None,
336    ///     max_commits: 100,
337    ///     limit: 10,
338    ///     min_score: 0.7,
339    ///     author: None,
340    ///     since: None,
341    ///     until: None,
342    ///     file_pattern: None,
343    /// };
344    ///
345    /// let response = client.search_git_history(request).await?;
346    /// for result in response.results {
347    ///     println!("Commit {}: {}", result.commit_hash, result.commit_message);
348    /// }
349    /// # Ok(())
350    /// # }
351    /// ```
352    pub async fn search_git_history(
353        &self,
354        request: SearchGitHistoryRequest,
355    ) -> Result<SearchGitHistoryResponse> {
356        // Validate request
357        request.validate().map_err(|e| anyhow::anyhow!(e))?;
358
359        // Forward to git indexing implementation
360        super::git_indexing::do_search_git_history(
361            self.embedding_provider.clone(),
362            self.vector_db.clone(),
363            self.git_cache.clone(),
364            &self.git_cache_path,
365            request,
366        )
367        .await
368    }
369}