brainwires_rag/rag/client/
constructor.rs1use super::RagClient;
4use crate::code_analysis::HybridRelationsProvider;
5use crate::rag::cache::HashCache;
6use crate::rag::config::Config;
7use crate::rag::embedding::FastEmbedManager;
8use crate::rag::git_cache::GitCache;
9use crate::rag::indexer::CodeChunker;
10use crate::rag::indexer::FileInfo;
11use crate::rag::indexer::detect_language;
12use brainwires_storage::databases::VectorDatabase;
13
14#[cfg(feature = "qdrant-backend")]
15use brainwires_storage::databases::QdrantDatabase;
16
17#[cfg(not(feature = "qdrant-backend"))]
18use brainwires_storage::databases::LanceDatabase;
19
20use anyhow::{Context, Result};
21use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24use tokio::sync::RwLock;
25
26impl RagClient {
27 pub async fn new() -> Result<Self> {
39 let config = Config::new().context("Failed to load configuration")?;
40 Self::with_config(config).await
41 }
42
43 pub async fn with_config(config: Config) -> Result<Self> {
60 tracing::info!("Initializing RAG client with configuration");
61 tracing::debug!("Vector DB backend: {}", config.vector_db.backend);
62 tracing::debug!("Embedding model: {}", config.embedding.model_name);
63 tracing::debug!("Chunk size: {}", config.indexing.chunk_size);
64
65 let embedding_provider = Arc::new(
67 FastEmbedManager::from_model_name(&config.embedding.model_name)
68 .context("Failed to initialize embedding provider")?,
69 );
70
71 #[cfg(feature = "qdrant-backend")]
73 let vector_db: Arc<dyn VectorDatabase> = {
74 tracing::info!(
75 "Using Qdrant vector database backend at {}",
76 config.vector_db.qdrant_url
77 );
78 Arc::new(
79 QdrantDatabase::with_url(&config.vector_db.qdrant_url)
80 .await
81 .context("Failed to initialize Qdrant vector database")?,
82 ) as Arc<dyn VectorDatabase>
83 };
84
85 #[cfg(not(feature = "qdrant-backend"))]
86 let vector_db: Arc<dyn VectorDatabase> = {
87 tracing::info!(
88 "Using LanceDB vector database backend at {}",
89 config.vector_db.lancedb_path.display()
90 );
91 Arc::new(
92 LanceDatabase::new(config.vector_db.lancedb_path.to_string_lossy().into_owned())
93 .await
94 .context("Failed to initialize LanceDB vector database")?,
95 ) as Arc<dyn VectorDatabase>
96 };
97
98 vector_db
100 .initialize(embedding_provider.dimension())
101 .await
102 .context("Failed to initialize vector database collections")?;
103
104 let chunker = Arc::new(CodeChunker::default_strategy());
106
107 let cache_path = config.cache.hash_cache_path.clone();
109 let hash_cache = HashCache::load(&cache_path).unwrap_or_else(|e| {
110 tracing::warn!("Failed to load hash cache: {}, starting fresh", e);
111 HashCache::default()
112 });
113
114 tracing::info!("Using hash cache file: {:?}", cache_path);
115
116 let git_cache_path = config.cache.git_cache_path.clone();
118 let git_cache = GitCache::load(&git_cache_path).unwrap_or_else(|e| {
119 tracing::warn!("Failed to load git cache: {}, starting fresh", e);
120 GitCache::default()
121 });
122
123 tracing::info!("Using git cache file: {:?}", git_cache_path);
124
125 let relations_provider = Arc::new(
127 HybridRelationsProvider::new().context("Failed to initialize relations provider")?,
128 );
129
130 Ok(Self {
131 embedding_provider,
132 vector_db,
133 chunker,
134 hash_cache: Arc::new(RwLock::new(hash_cache)),
135 cache_path,
136 git_cache: Arc::new(RwLock::new(git_cache)),
137 git_cache_path,
138 config: Arc::new(config),
139 indexing_ops: Arc::new(RwLock::new(HashMap::new())),
140 relations_provider,
141 })
142 }
143
144 pub async fn with_vector_db(
149 vector_db: Arc<dyn VectorDatabase>,
150 config: Config,
151 ) -> Result<Self> {
152 tracing::info!("Initializing RAG client with externally-provided vector database");
153
154 let embedding_provider = Arc::new(
156 FastEmbedManager::from_model_name(&config.embedding.model_name)
157 .context("Failed to initialize embedding provider")?,
158 );
159
160 vector_db
162 .initialize(embedding_provider.dimension())
163 .await
164 .context("Failed to initialize vector database collections")?;
165
166 let chunker = Arc::new(CodeChunker::default_strategy());
168
169 let cache_path = config.cache.hash_cache_path.clone();
171 let hash_cache = HashCache::load(&cache_path).unwrap_or_else(|e| {
172 tracing::warn!("Failed to load hash cache: {}, starting fresh", e);
173 HashCache::default()
174 });
175
176 let git_cache_path = config.cache.git_cache_path.clone();
178 let git_cache = GitCache::load(&git_cache_path).unwrap_or_else(|e| {
179 tracing::warn!("Failed to load git cache: {}, starting fresh", e);
180 GitCache::default()
181 });
182
183 let relations_provider = Arc::new(
185 HybridRelationsProvider::new().context("Failed to initialize relations provider")?,
186 );
187
188 Ok(Self {
189 embedding_provider,
190 vector_db,
191 chunker,
192 hash_cache: Arc::new(RwLock::new(hash_cache)),
193 cache_path,
194 git_cache: Arc::new(RwLock::new(git_cache)),
195 git_cache_path,
196 config: Arc::new(config),
197 indexing_ops: Arc::new(RwLock::new(HashMap::new())),
198 relations_provider,
199 })
200 }
201
202 #[cfg(test)]
204 pub async fn new_with_db_path(db_path: &str, cache_path: PathBuf) -> Result<Self> {
205 let mut config = Config::default();
207 config.vector_db.lancedb_path = PathBuf::from(db_path);
208 config.cache.hash_cache_path = cache_path.clone();
209 config.cache.git_cache_path = cache_path.parent().unwrap().join("git_cache.json");
210
211 Self::with_config(config).await
212 }
213
214 pub(crate) fn create_file_info(
216 &self,
217 file_path: &str,
218 project: Option<String>,
219 ) -> Result<FileInfo> {
220 use std::path::Path;
221
222 let path = Path::new(file_path);
223 let canonical = std::fs::canonicalize(path)
224 .with_context(|| format!("Failed to canonicalize path: {}", file_path))?;
225
226 let content = std::fs::read_to_string(&canonical)
227 .with_context(|| format!("Failed to read file: {}", file_path))?;
228
229 let extension = canonical
230 .extension()
231 .and_then(|e| e.to_str())
232 .map(|s| s.to_string());
233
234 let language = extension.as_ref().and_then(|ext| detect_language(ext));
235
236 use sha2::{Digest, Sha256};
238 let mut hasher = Sha256::new();
239 hasher.update(content.as_bytes());
240 let hash = format!("{:x}", hasher.finalize());
241
242 let root_path = canonical
244 .parent()
245 .map(|p| p.to_string_lossy().to_string())
246 .unwrap_or_else(|| "/".to_string());
247
248 let relative_path = canonical
249 .file_name()
250 .map(|n| n.to_string_lossy().to_string())
251 .unwrap_or_else(|| file_path.to_string());
252
253 Ok(FileInfo {
254 path: canonical,
255 relative_path,
256 root_path,
257 project,
258 extension,
259 language,
260 content,
261 hash,
262 })
263 }
264
265 pub fn normalize_path(path: &str) -> Result<String> {
267 let path_buf = PathBuf::from(path);
268 let canonical = std::fs::canonicalize(&path_buf)
269 .with_context(|| format!("Failed to canonicalize path: {}", path))?;
270 Ok(canonical.to_string_lossy().to_string())
271 }
272
273 pub async fn is_index_dirty(&self, path: &str) -> bool {
278 if let Ok(normalized) = Self::normalize_path(path) {
279 let cache = self.hash_cache.read().await;
280 cache.is_dirty(&normalized)
281 } else {
282 false
283 }
284 }
285
286 pub async fn get_dirty_paths(&self) -> Vec<String> {
290 let cache = self.hash_cache.read().await;
291 cache.get_dirty_roots().keys().cloned().collect()
292 }
293
294 pub(crate) async fn check_path_not_dirty(&self, path: Option<&str>) -> Result<()> {
298 if let Some(p) = path
299 && self.is_index_dirty(p).await
300 {
301 anyhow::bail!(
302 "Index for '{}' is dirty (previous indexing was interrupted). \
303 Please re-run index_codebase to rebuild the index before querying.",
304 p
305 );
306 }
307 Ok(())
308 }
309
310 pub fn config(&self) -> &Config {
312 &self.config
313 }
314
315 pub fn embedding_dimension(&self) -> usize {
317 self.embedding_provider.dimension()
318 }
319}