Skip to main content

brainwires_rag/rag/client/
constructor.rs

1//! Constructor methods and basic utility methods for [`RagClient`].
2
3use super::RagClient;
4use crate::code_analysis::HybridRelationsProvider;
5use crate::rag::cache::HashCache;
6use crate::rag::config::Config;
7use crate::rag::embedding::FastEmbedManager;
8use crate::rag::git_cache::GitCache;
9use crate::rag::indexer::CodeChunker;
10use crate::rag::indexer::FileInfo;
11use crate::rag::indexer::detect_language;
12use brainwires_storage::databases::VectorDatabase;
13
14#[cfg(feature = "qdrant-backend")]
15use brainwires_storage::databases::QdrantDatabase;
16
17#[cfg(not(feature = "qdrant-backend"))]
18use brainwires_storage::databases::LanceDatabase;
19
20use anyhow::{Context, Result};
21use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24use tokio::sync::RwLock;
25
26impl RagClient {
27    /// Create a new RAG client with default configuration
28    ///
29    /// This will initialize the embedding model, vector database, and load
30    /// any existing caches from disk.
31    ///
32    /// # Errors
33    ///
34    /// Returns an error if:
35    /// - Configuration cannot be loaded
36    /// - Embedding model cannot be initialized
37    /// - Vector database cannot be initialized
38    pub async fn new() -> Result<Self> {
39        let config = Config::new().context("Failed to load configuration")?;
40        Self::with_config(config).await
41    }
42
43    /// Create a new RAG client with custom configuration
44    ///
45    /// # Example
46    ///
47    /// ```ignore
48    /// use crate::rag::{RagClient, Config};
49    ///
50    /// #[tokio::main]
51    /// async fn main() -> anyhow::Result<()> {
52    ///     let mut config = Config::default();
53    ///     config.embedding.model_name = "BAAI/bge-small-en-v1.5".to_string();
54    ///
55    ///     let client = RagClient::with_config(config).await?;
56    ///     Ok(())
57    /// }
58    /// ```
59    pub async fn with_config(config: Config) -> Result<Self> {
60        tracing::info!("Initializing RAG client with configuration");
61        tracing::debug!("Vector DB backend: {}", config.vector_db.backend);
62        tracing::debug!("Embedding model: {}", config.embedding.model_name);
63        tracing::debug!("Chunk size: {}", config.indexing.chunk_size);
64
65        // Initialize embedding provider with configured model
66        let embedding_provider = Arc::new(
67            FastEmbedManager::from_model_name(&config.embedding.model_name)
68                .context("Failed to initialize embedding provider")?,
69        );
70
71        // Initialize the appropriate vector database backend
72        #[cfg(feature = "qdrant-backend")]
73        let vector_db: Arc<dyn VectorDatabase> = {
74            tracing::info!(
75                "Using Qdrant vector database backend at {}",
76                config.vector_db.qdrant_url
77            );
78            Arc::new(
79                QdrantDatabase::with_url(&config.vector_db.qdrant_url)
80                    .await
81                    .context("Failed to initialize Qdrant vector database")?,
82            ) as Arc<dyn VectorDatabase>
83        };
84
85        #[cfg(not(feature = "qdrant-backend"))]
86        let vector_db: Arc<dyn VectorDatabase> = {
87            tracing::info!(
88                "Using LanceDB vector database backend at {}",
89                config.vector_db.lancedb_path.display()
90            );
91            Arc::new(
92                LanceDatabase::new(config.vector_db.lancedb_path.to_string_lossy().into_owned())
93                    .await
94                    .context("Failed to initialize LanceDB vector database")?,
95            ) as Arc<dyn VectorDatabase>
96        };
97
98        // Initialize the database with the embedding dimension
99        vector_db
100            .initialize(embedding_provider.dimension())
101            .await
102            .context("Failed to initialize vector database collections")?;
103
104        // Create chunker with configured chunk size
105        let chunker = Arc::new(CodeChunker::default_strategy());
106
107        // Load persistent hash cache
108        let cache_path = config.cache.hash_cache_path.clone();
109        let hash_cache = HashCache::load(&cache_path).unwrap_or_else(|e| {
110            tracing::warn!("Failed to load hash cache: {}, starting fresh", e);
111            HashCache::default()
112        });
113
114        tracing::info!("Using hash cache file: {:?}", cache_path);
115
116        // Load persistent git cache
117        let git_cache_path = config.cache.git_cache_path.clone();
118        let git_cache = GitCache::load(&git_cache_path).unwrap_or_else(|e| {
119            tracing::warn!("Failed to load git cache: {}, starting fresh", e);
120            GitCache::default()
121        });
122
123        tracing::info!("Using git cache file: {:?}", git_cache_path);
124
125        // Initialize relations provider for code navigation
126        let relations_provider = Arc::new(
127            HybridRelationsProvider::new().context("Failed to initialize relations provider")?,
128        );
129
130        Ok(Self {
131            embedding_provider,
132            vector_db,
133            chunker,
134            hash_cache: Arc::new(RwLock::new(hash_cache)),
135            cache_path,
136            git_cache: Arc::new(RwLock::new(git_cache)),
137            git_cache_path,
138            config: Arc::new(config),
139            indexing_ops: Arc::new(RwLock::new(HashMap::new())),
140            relations_provider,
141        })
142    }
143
144    /// Create a RAG client with an externally-provided vector database.
145    ///
146    /// This enables callers to share a database connection across subsystems
147    /// instead of creating a new one internally.
148    pub async fn with_vector_db(
149        vector_db: Arc<dyn VectorDatabase>,
150        config: Config,
151    ) -> Result<Self> {
152        tracing::info!("Initializing RAG client with externally-provided vector database");
153
154        // Initialize embedding provider with configured model
155        let embedding_provider = Arc::new(
156            FastEmbedManager::from_model_name(&config.embedding.model_name)
157                .context("Failed to initialize embedding provider")?,
158        );
159
160        // Initialize the database with the embedding dimension
161        vector_db
162            .initialize(embedding_provider.dimension())
163            .await
164            .context("Failed to initialize vector database collections")?;
165
166        // Create chunker with configured chunk size
167        let chunker = Arc::new(CodeChunker::default_strategy());
168
169        // Load persistent hash cache
170        let cache_path = config.cache.hash_cache_path.clone();
171        let hash_cache = HashCache::load(&cache_path).unwrap_or_else(|e| {
172            tracing::warn!("Failed to load hash cache: {}, starting fresh", e);
173            HashCache::default()
174        });
175
176        // Load persistent git cache
177        let git_cache_path = config.cache.git_cache_path.clone();
178        let git_cache = GitCache::load(&git_cache_path).unwrap_or_else(|e| {
179            tracing::warn!("Failed to load git cache: {}, starting fresh", e);
180            GitCache::default()
181        });
182
183        // Initialize relations provider for code navigation
184        let relations_provider = Arc::new(
185            HybridRelationsProvider::new().context("Failed to initialize relations provider")?,
186        );
187
188        Ok(Self {
189            embedding_provider,
190            vector_db,
191            chunker,
192            hash_cache: Arc::new(RwLock::new(hash_cache)),
193            cache_path,
194            git_cache: Arc::new(RwLock::new(git_cache)),
195            git_cache_path,
196            config: Arc::new(config),
197            indexing_ops: Arc::new(RwLock::new(HashMap::new())),
198            relations_provider,
199        })
200    }
201
202    /// Create a new client with custom database path (for testing)
203    #[cfg(test)]
204    pub async fn new_with_db_path(db_path: &str, cache_path: PathBuf) -> Result<Self> {
205        // Create a test config with custom paths
206        let mut config = Config::default();
207        config.vector_db.lancedb_path = PathBuf::from(db_path);
208        config.cache.hash_cache_path = cache_path.clone();
209        config.cache.git_cache_path = cache_path.parent().unwrap().join("git_cache.json");
210
211        Self::with_config(config).await
212    }
213
214    /// Create FileInfo from a file path for relations analysis
215    pub(crate) fn create_file_info(
216        &self,
217        file_path: &str,
218        project: Option<String>,
219    ) -> Result<FileInfo> {
220        use std::path::Path;
221
222        let path = Path::new(file_path);
223        let canonical = std::fs::canonicalize(path)
224            .with_context(|| format!("Failed to canonicalize path: {}", file_path))?;
225
226        let content = std::fs::read_to_string(&canonical)
227            .with_context(|| format!("Failed to read file: {}", file_path))?;
228
229        let extension = canonical
230            .extension()
231            .and_then(|e| e.to_str())
232            .map(|s| s.to_string());
233
234        let language = extension.as_ref().and_then(|ext| detect_language(ext));
235
236        // Compute file hash
237        use sha2::{Digest, Sha256};
238        let mut hasher = Sha256::new();
239        hasher.update(content.as_bytes());
240        let hash = format!("{:x}", hasher.finalize());
241
242        // Determine root path (parent directory)
243        let root_path = canonical
244            .parent()
245            .map(|p| p.to_string_lossy().to_string())
246            .unwrap_or_else(|| "/".to_string());
247
248        let relative_path = canonical
249            .file_name()
250            .map(|n| n.to_string_lossy().to_string())
251            .unwrap_or_else(|| file_path.to_string());
252
253        Ok(FileInfo {
254            path: canonical,
255            relative_path,
256            root_path,
257            project,
258            extension,
259            language,
260            content,
261            hash,
262        })
263    }
264
265    /// Normalize a path to a canonical absolute form for consistent cache lookups
266    pub fn normalize_path(path: &str) -> Result<String> {
267        let path_buf = PathBuf::from(path);
268        let canonical = std::fs::canonicalize(&path_buf)
269            .with_context(|| format!("Failed to canonicalize path: {}", path))?;
270        Ok(canonical.to_string_lossy().to_string())
271    }
272
273    /// Check if a specific path's index is dirty (incomplete/corrupted)
274    ///
275    /// Returns true if the path is marked as dirty, meaning a previous indexing
276    /// operation was interrupted and the data may be inconsistent.
277    pub async fn is_index_dirty(&self, path: &str) -> bool {
278        if let Ok(normalized) = Self::normalize_path(path) {
279            let cache = self.hash_cache.read().await;
280            cache.is_dirty(&normalized)
281        } else {
282            false
283        }
284    }
285
286    /// Check if any indexed paths are dirty
287    ///
288    /// Returns a list of paths that have dirty indexes.
289    pub async fn get_dirty_paths(&self) -> Vec<String> {
290        let cache = self.hash_cache.read().await;
291        cache.get_dirty_roots().keys().cloned().collect()
292    }
293
294    /// Check if searching on a specific path should be blocked due to dirty state
295    ///
296    /// Returns an error if the path is dirty, otherwise Ok(())
297    pub(crate) async fn check_path_not_dirty(&self, path: Option<&str>) -> Result<()> {
298        if let Some(p) = path
299            && self.is_index_dirty(p).await
300        {
301            anyhow::bail!(
302                "Index for '{}' is dirty (previous indexing was interrupted). \
303                    Please re-run index_codebase to rebuild the index before querying.",
304                p
305            );
306        }
307        Ok(())
308    }
309
310    /// Get the configuration used by this client
311    pub fn config(&self) -> &Config {
312        &self.config
313    }
314
315    /// Get the embedding dimension used by this client
316    pub fn embedding_dimension(&self) -> usize {
317        self.embedding_provider.dimension()
318    }
319}