use super::RagClient;
#[cfg(feature = "code-analysis")]
use crate::code_analysis::HybridRelationsProvider;
use crate::rag::cache::HashCache;
use crate::rag::config::Config;
use crate::rag::embedding::{EmbeddingProvider, FastEmbedManager};
use crate::rag::git_cache::GitCache;
use crate::rag::indexer::CodeChunker;
#[cfg(feature = "code-analysis")]
use crate::rag::indexer::FileInfo;
#[cfg(feature = "code-analysis")]
use crate::rag::indexer::detect_language;
use brainwires_storage::databases::VectorDatabase;
#[cfg(feature = "qdrant-backend")]
use brainwires_storage::databases::QdrantDatabase;
#[cfg(not(feature = "qdrant-backend"))]
use brainwires_storage::databases::LanceDatabase;
use anyhow::{Context, Result};
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::RwLock;
impl RagClient {
pub async fn new() -> Result<Self> {
let config = Config::new().context("Failed to load configuration")?;
Self::with_config(config).await
}
pub async fn with_config(config: Config) -> Result<Self> {
tracing::info!("Initializing RAG client with configuration");
tracing::debug!("Vector DB backend: {}", config.vector_db.backend);
tracing::debug!("Embedding model: {}", config.embedding.model_name);
tracing::debug!("Chunk size: {}", config.indexing.chunk_size);
let embedding_provider = Arc::new(
FastEmbedManager::from_model_name(&config.embedding.model_name)
.context("Failed to initialize embedding provider")?,
);
#[cfg(feature = "qdrant-backend")]
let vector_db: Arc<dyn VectorDatabase> = {
tracing::info!(
"Using Qdrant vector database backend at {}",
config.vector_db.qdrant_url
);
Arc::new(
QdrantDatabase::with_url(&config.vector_db.qdrant_url)
.await
.context("Failed to initialize Qdrant vector database")?,
) as Arc<dyn VectorDatabase>
};
#[cfg(not(feature = "qdrant-backend"))]
let vector_db: Arc<dyn VectorDatabase> = {
tracing::info!(
"Using LanceDB vector database backend at {}",
config.vector_db.lancedb_path.display()
);
Arc::new(
LanceDatabase::new(config.vector_db.lancedb_path.to_string_lossy().into_owned())
.await
.context("Failed to initialize LanceDB vector database")?,
) as Arc<dyn VectorDatabase>
};
vector_db
.initialize(embedding_provider.dimension())
.await
.context("Failed to initialize vector database collections")?;
let chunker = Arc::new(CodeChunker::default_strategy());
let cache_path = config.cache.hash_cache_path.clone();
let hash_cache = HashCache::load(&cache_path).unwrap_or_else(|e| {
tracing::warn!("Failed to load hash cache: {}, starting fresh", e);
HashCache::default()
});
tracing::info!("Using hash cache file: {:?}", cache_path);
let git_cache_path = config.cache.git_cache_path.clone();
let git_cache = GitCache::load(&git_cache_path).unwrap_or_else(|e| {
tracing::warn!("Failed to load git cache: {}, starting fresh", e);
GitCache::default()
});
tracing::info!("Using git cache file: {:?}", git_cache_path);
#[cfg(feature = "code-analysis")]
let relations_provider = Arc::new(
HybridRelationsProvider::new(false) .context("Failed to initialize relations provider")?,
);
Ok(Self {
embedding_provider,
vector_db,
chunker,
hash_cache: Arc::new(RwLock::new(hash_cache)),
cache_path,
git_cache: Arc::new(RwLock::new(git_cache)),
git_cache_path,
config: Arc::new(config),
indexing_ops: Arc::new(RwLock::new(HashMap::new())),
#[cfg(feature = "code-analysis")]
relations_provider,
})
}
pub async fn with_vector_db(
vector_db: Arc<dyn VectorDatabase>,
config: Config,
) -> Result<Self> {
tracing::info!("Initializing RAG client with externally-provided vector database");
let embedding_provider = Arc::new(
FastEmbedManager::from_model_name(&config.embedding.model_name)
.context("Failed to initialize embedding provider")?,
);
vector_db
.initialize(embedding_provider.dimension())
.await
.context("Failed to initialize vector database collections")?;
let chunker = Arc::new(CodeChunker::default_strategy());
let cache_path = config.cache.hash_cache_path.clone();
let hash_cache = HashCache::load(&cache_path).unwrap_or_else(|e| {
tracing::warn!("Failed to load hash cache: {}, starting fresh", e);
HashCache::default()
});
let git_cache_path = config.cache.git_cache_path.clone();
let git_cache = GitCache::load(&git_cache_path).unwrap_or_else(|e| {
tracing::warn!("Failed to load git cache: {}, starting fresh", e);
GitCache::default()
});
#[cfg(feature = "code-analysis")]
let relations_provider = Arc::new(
HybridRelationsProvider::new(false)
.context("Failed to initialize relations provider")?,
);
Ok(Self {
embedding_provider,
vector_db,
chunker,
hash_cache: Arc::new(RwLock::new(hash_cache)),
cache_path,
git_cache: Arc::new(RwLock::new(git_cache)),
git_cache_path,
config: Arc::new(config),
indexing_ops: Arc::new(RwLock::new(HashMap::new())),
#[cfg(feature = "code-analysis")]
relations_provider,
})
}
#[cfg(test)]
pub async fn new_with_db_path(db_path: &str, cache_path: PathBuf) -> Result<Self> {
let mut config = Config::default();
config.vector_db.lancedb_path = PathBuf::from(db_path);
config.cache.hash_cache_path = cache_path.clone();
config.cache.git_cache_path = cache_path.parent().unwrap().join("git_cache.json");
Self::with_config(config).await
}
#[cfg(feature = "code-analysis")]
pub(crate) fn create_file_info(
&self,
file_path: &str,
project: Option<String>,
) -> Result<FileInfo> {
use std::path::Path;
let path = Path::new(file_path);
let canonical = std::fs::canonicalize(path)
.with_context(|| format!("Failed to canonicalize path: {}", file_path))?;
let content = std::fs::read_to_string(&canonical)
.with_context(|| format!("Failed to read file: {}", file_path))?;
let extension = canonical
.extension()
.and_then(|e| e.to_str())
.map(|s| s.to_string());
let language = extension.as_ref().and_then(|ext| detect_language(ext));
use sha2::{Digest, Sha256};
let mut hasher = Sha256::new();
hasher.update(content.as_bytes());
let hash = format!("{:x}", hasher.finalize());
let root_path = canonical
.parent()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_else(|| "/".to_string());
let relative_path = canonical
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| file_path.to_string());
Ok(FileInfo {
path: canonical,
relative_path,
root_path,
project,
extension,
language,
content,
hash,
})
}
pub fn normalize_path(path: &str) -> Result<String> {
let path_buf = PathBuf::from(path);
let canonical = std::fs::canonicalize(&path_buf)
.with_context(|| format!("Failed to canonicalize path: {}", path))?;
Ok(canonical.to_string_lossy().to_string())
}
pub async fn is_index_dirty(&self, path: &str) -> bool {
if let Ok(normalized) = Self::normalize_path(path) {
let cache = self.hash_cache.read().await;
cache.is_dirty(&normalized)
} else {
false
}
}
pub async fn get_dirty_paths(&self) -> Vec<String> {
let cache = self.hash_cache.read().await;
cache.get_dirty_roots().keys().cloned().collect()
}
pub(crate) async fn check_path_not_dirty(&self, path: Option<&str>) -> Result<()> {
if let Some(p) = path
&& self.is_index_dirty(p).await
{
anyhow::bail!(
"Index for '{}' is dirty (previous indexing was interrupted). \
Please re-run index_codebase to rebuild the index before querying.",
p
);
}
Ok(())
}
pub fn config(&self) -> &Config {
&self.config
}
pub fn embedding_dimension(&self) -> usize {
self.embedding_provider.dimension()
}
}