project_rag/client/
mod.rs

1//! Core library client for project-rag
2//!
3//! This module provides the main client interface for using project-rag
4//! as a library in your own Rust applications.
5
6use crate::cache::HashCache;
7use crate::config::Config;
8use crate::embedding::{EmbeddingProvider, FastEmbedManager};
9use crate::git_cache::GitCache;
10use crate::indexer::{CodeChunker, FileInfo, detect_language};
11use crate::relations::{
12    DefinitionResult, HybridRelationsProvider, ReferenceResult, RelationsProvider,
13};
14use crate::types::*;
15use crate::vector_db::VectorDatabase;
16
17// Conditionally import the appropriate vector database backend
18#[cfg(feature = "qdrant-backend")]
19use crate::vector_db::QdrantVectorDB;
20
21#[cfg(not(feature = "qdrant-backend"))]
22use crate::vector_db::LanceVectorDB;
23
24use anyhow::{Context, Result};
25use std::collections::HashMap;
26use std::path::PathBuf;
27use std::sync::Arc;
28use std::time::Instant;
29use tokio::sync::RwLock;
30use tokio::sync::broadcast;
31
32// Filesystem locking for cross-process coordination
33mod fs_lock;
34pub(crate) use fs_lock::FsLockGuard;
35
36// Index locking mechanism (uses fs_lock for cross-process, broadcast for in-process)
37mod index_lock;
38pub(crate) use index_lock::{IndexLockGuard, IndexLockResult, IndexingOperation};
39
40/// Main client for interacting with the RAG system
41///
42/// This client provides a high-level API for indexing codebases and performing
43/// semantic searches. It contains all the core functionality and can be used
44/// directly as a library or wrapped by the MCP server.
45///
46/// # Example
47///
48/// ```no_run
49/// use project_rag::{RagClient, IndexRequest, QueryRequest};
50///
51/// #[tokio::main]
52/// async fn main() -> anyhow::Result<()> {
53///     // Create client with default configuration
54///     let client = RagClient::new().await?;
55///
56///     // Index a codebase
57///     let index_req = IndexRequest {
58///         path: "/path/to/code".to_string(),
59///         project: Some("my-project".to_string()),
60///         include_patterns: vec!["**/*.rs".to_string()],
61///         exclude_patterns: vec!["**/target/**".to_string()],
62///         max_file_size: 1_048_576,
63///     };
64///     let response = client.index_codebase(index_req).await?;
65///     println!("Indexed {} files", response.files_indexed);
66///
67///     Ok(())
68/// }
69/// ```
70#[derive(Clone)]
71pub struct RagClient {
72    pub(crate) embedding_provider: Arc<FastEmbedManager>,
73    #[cfg(feature = "qdrant-backend")]
74    pub(crate) vector_db: Arc<QdrantVectorDB>,
75    #[cfg(not(feature = "qdrant-backend"))]
76    pub(crate) vector_db: Arc<LanceVectorDB>,
77    pub(crate) chunker: Arc<CodeChunker>,
78    // Persistent hash cache for incremental updates
79    pub(crate) hash_cache: Arc<RwLock<HashCache>>,
80    pub(crate) cache_path: PathBuf,
81    // Git cache for git history indexing
82    pub(crate) git_cache: Arc<RwLock<GitCache>>,
83    pub(crate) git_cache_path: PathBuf,
84    // Configuration (for accessing batch sizes, timeouts, etc.)
85    pub(crate) config: Arc<Config>,
86    // In-progress indexing operations (prevents concurrent indexing and allows result sharing)
87    pub(crate) indexing_ops: Arc<RwLock<HashMap<String, IndexingOperation>>>,
88    // Relations provider for code navigation (find definition, references, call graph)
89    pub(crate) relations_provider: Arc<HybridRelationsProvider>,
90}
91
92impl RagClient {
93    /// Create a new RAG client with default configuration
94    ///
95    /// This will initialize the embedding model, vector database, and load
96    /// any existing caches from disk.
97    ///
98    /// # Errors
99    ///
100    /// Returns an error if:
101    /// - Configuration cannot be loaded
102    /// - Embedding model cannot be initialized
103    /// - Vector database cannot be initialized
104    pub async fn new() -> Result<Self> {
105        let config = Config::new().context("Failed to load configuration")?;
106        Self::with_config(config).await
107    }
108
109    /// Create a new RAG client with custom configuration
110    ///
111    /// # Example
112    ///
113    /// ```no_run
114    /// use project_rag::{RagClient, Config};
115    ///
116    /// #[tokio::main]
117    /// async fn main() -> anyhow::Result<()> {
118    ///     let mut config = Config::default();
119    ///     config.embedding.model_name = "BAAI/bge-small-en-v1.5".to_string();
120    ///
121    ///     let client = RagClient::with_config(config).await?;
122    ///     Ok(())
123    /// }
124    /// ```
125    pub async fn with_config(config: Config) -> Result<Self> {
126        tracing::info!("Initializing RAG client with configuration");
127        tracing::debug!("Vector DB backend: {}", config.vector_db.backend);
128        tracing::debug!("Embedding model: {}", config.embedding.model_name);
129        tracing::debug!("Chunk size: {}", config.indexing.chunk_size);
130
131        // Initialize embedding provider with configured model
132        let embedding_provider = Arc::new(
133            FastEmbedManager::from_model_name(&config.embedding.model_name)
134                .context("Failed to initialize embedding provider")?,
135        );
136
137        // Initialize the appropriate vector database backend
138        #[cfg(feature = "qdrant-backend")]
139        let vector_db = {
140            tracing::info!(
141                "Using Qdrant vector database backend at {}",
142                config.vector_db.qdrant_url
143            );
144            Arc::new(
145                QdrantVectorDB::with_url(&config.vector_db.qdrant_url)
146                    .await
147                    .context("Failed to initialize Qdrant vector database")?,
148            )
149        };
150
151        #[cfg(not(feature = "qdrant-backend"))]
152        let vector_db = {
153            tracing::info!(
154                "Using LanceDB vector database backend at {}",
155                config.vector_db.lancedb_path.display()
156            );
157            Arc::new(
158                LanceVectorDB::with_path(&config.vector_db.lancedb_path.to_string_lossy())
159                    .await
160                    .context("Failed to initialize LanceDB vector database")?,
161            )
162        };
163
164        // Initialize the database with the embedding dimension
165        vector_db
166            .initialize(embedding_provider.dimension())
167            .await
168            .context("Failed to initialize vector database collections")?;
169
170        // Create chunker with configured chunk size
171        let chunker = Arc::new(CodeChunker::default_strategy());
172
173        // Load persistent hash cache
174        let cache_path = config.cache.hash_cache_path.clone();
175        let hash_cache = HashCache::load(&cache_path).unwrap_or_else(|e| {
176            tracing::warn!("Failed to load hash cache: {}, starting fresh", e);
177            HashCache::default()
178        });
179
180        tracing::info!("Using hash cache file: {:?}", cache_path);
181
182        // Load persistent git cache
183        let git_cache_path = config.cache.git_cache_path.clone();
184        let git_cache = GitCache::load(&git_cache_path).unwrap_or_else(|e| {
185            tracing::warn!("Failed to load git cache: {}, starting fresh", e);
186            GitCache::default()
187        });
188
189        tracing::info!("Using git cache file: {:?}", git_cache_path);
190
191        // Initialize relations provider for code navigation
192        let relations_provider = Arc::new(
193            HybridRelationsProvider::new(false) // stack-graphs disabled by default
194                .context("Failed to initialize relations provider")?,
195        );
196
197        Ok(Self {
198            embedding_provider,
199            vector_db,
200            chunker,
201            hash_cache: Arc::new(RwLock::new(hash_cache)),
202            cache_path,
203            git_cache: Arc::new(RwLock::new(git_cache)),
204            git_cache_path,
205            config: Arc::new(config),
206            indexing_ops: Arc::new(RwLock::new(HashMap::new())),
207            relations_provider,
208        })
209    }
210
211    /// Create a new client with custom database path (for testing)
212    #[cfg(test)]
213    pub async fn new_with_db_path(db_path: &str, cache_path: PathBuf) -> Result<Self> {
214        // Create a test config with custom paths
215        let mut config = Config::default();
216        config.vector_db.lancedb_path = PathBuf::from(db_path);
217        config.cache.hash_cache_path = cache_path.clone();
218        config.cache.git_cache_path = cache_path.parent().unwrap().join("git_cache.json");
219
220        Self::with_config(config).await
221    }
222
223    /// Create FileInfo from a file path for relations analysis
224    fn create_file_info(&self, file_path: &str, project: Option<String>) -> Result<FileInfo> {
225        use std::path::Path;
226
227        let path = Path::new(file_path);
228        let canonical = std::fs::canonicalize(path)
229            .with_context(|| format!("Failed to canonicalize path: {}", file_path))?;
230
231        let content = std::fs::read_to_string(&canonical)
232            .with_context(|| format!("Failed to read file: {}", file_path))?;
233
234        let extension = canonical
235            .extension()
236            .and_then(|e| e.to_str())
237            .map(|s| s.to_string());
238
239        let language = extension.as_ref().and_then(|ext| {
240            detect_language(ext)
241        });
242
243        // Compute file hash
244        use sha2::{Sha256, Digest};
245        let mut hasher = Sha256::new();
246        hasher.update(content.as_bytes());
247        let hash = format!("{:x}", hasher.finalize());
248
249        // Determine root path (parent directory)
250        let root_path = canonical
251            .parent()
252            .map(|p| p.to_string_lossy().to_string())
253            .unwrap_or_else(|| "/".to_string());
254
255        let relative_path = canonical
256            .file_name()
257            .map(|n| n.to_string_lossy().to_string())
258            .unwrap_or_else(|| file_path.to_string());
259
260        Ok(FileInfo {
261            path: canonical,
262            relative_path,
263            root_path,
264            project,
265            extension,
266            language,
267            content,
268            hash,
269        })
270    }
271
272    /// Normalize a path to a canonical absolute form for consistent cache lookups
273    pub fn normalize_path(path: &str) -> Result<String> {
274        let path_buf = PathBuf::from(path);
275        let canonical = std::fs::canonicalize(&path_buf)
276            .with_context(|| format!("Failed to canonicalize path: {}", path))?;
277        Ok(canonical.to_string_lossy().to_string())
278    }
279
280    /// Check if a specific path's index is dirty (incomplete/corrupted)
281    ///
282    /// Returns true if the path is marked as dirty, meaning a previous indexing
283    /// operation was interrupted and the data may be inconsistent.
284    pub async fn is_index_dirty(&self, path: &str) -> bool {
285        if let Ok(normalized) = Self::normalize_path(path) {
286            let cache = self.hash_cache.read().await;
287            cache.is_dirty(&normalized)
288        } else {
289            false
290        }
291    }
292
293    /// Check if any indexed paths are dirty
294    ///
295    /// Returns a list of paths that have dirty indexes.
296    pub async fn get_dirty_paths(&self) -> Vec<String> {
297        let cache = self.hash_cache.read().await;
298        cache.get_dirty_roots().keys().cloned().collect()
299    }
300
301    /// Check if searching on a specific path should be blocked due to dirty state
302    ///
303    /// Returns an error if the path is dirty, otherwise Ok(())
304    async fn check_path_not_dirty(&self, path: Option<&str>) -> Result<()> {
305        if let Some(p) = path {
306            if self.is_index_dirty(p).await {
307                anyhow::bail!(
308                    "Index for '{}' is dirty (previous indexing was interrupted). \
309                    Please re-run index_codebase to rebuild the index before querying.",
310                    p
311                );
312            }
313        }
314        Ok(())
315    }
316
317    /// Try to acquire an indexing lock for a given path
318    ///
319    /// This uses a two-layer locking strategy:
320    /// 1. Filesystem lock (flock) for cross-process coordination
321    /// 2. In-memory lock for broadcasting results to waiters in the same process
322    ///
323    /// Returns either:
324    /// - `IndexLockResult::Acquired(guard)` if we should perform the indexing
325    /// - `IndexLockResult::WaitForResult(receiver)` if another task in THIS process is indexing
326    /// - `IndexLockResult::WaitForFilesystemLock(path)` if ANOTHER PROCESS is indexing
327    ///
328    /// The lock is automatically released when the returned guard is dropped.
329    pub(crate) async fn try_acquire_index_lock(&self, path: &str) -> Result<IndexLockResult> {
330        use std::sync::atomic::Ordering;
331        use std::time::Instant;
332
333        // Normalize the path to ensure consistent locking across different path formats
334        let normalized_path = Self::normalize_path(path)?;
335
336        // STEP 1: Try to acquire filesystem lock first (cross-process coordination)
337        // This must happen BEFORE checking in-memory state to prevent race conditions
338        let fs_lock = {
339            let path_clone = normalized_path.clone();
340            tokio::task::spawn_blocking(move || FsLockGuard::try_acquire(&path_clone))
341                .await
342                .context("Filesystem lock task panicked")??
343        };
344
345        // If we couldn't get the filesystem lock, another PROCESS is indexing
346        let fs_lock = match fs_lock {
347            Some(lock) => lock,
348            None => {
349                tracing::info!(
350                    "Another process is indexing {} - returning WaitForFilesystemLock",
351                    normalized_path
352                );
353                return Ok(IndexLockResult::WaitForFilesystemLock(normalized_path));
354            }
355        };
356
357        // STEP 2: We have the filesystem lock, now check in-memory state
358        // This handles the case where another task in THIS process is indexing
359
360        // Acquire write lock on the ops map
361        let mut ops = self.indexing_ops.write().await;
362
363        // Check if an operation is already in progress for this path (in this process)
364        if let Some(existing_op) = ops.get(&normalized_path) {
365            // Check if the operation is stale (timed out or crashed)
366            if existing_op.is_stale() {
367                tracing::warn!(
368                    "Removing stale indexing lock for {} (operation timed out after {:?})",
369                    normalized_path,
370                    existing_op.started_at.elapsed()
371                );
372                ops.remove(&normalized_path);
373            } else if existing_op.active.load(Ordering::Acquire) {
374                // Operation is still active and not stale, subscribe to receive the result
375                // Note: We drop the filesystem lock here since we won't be indexing
376                drop(fs_lock);
377                let receiver = existing_op.result_tx.subscribe();
378                tracing::info!(
379                    "Indexing already in progress in this process for {} (started {:?} ago), waiting for result",
380                    normalized_path,
381                    existing_op.started_at.elapsed()
382                );
383                return Ok(IndexLockResult::WaitForResult(receiver));
384            } else {
385                // Operation completed but cleanup hasn't happened yet
386                tracing::debug!(
387                    "Removing completed indexing lock for {} (cleanup pending)",
388                    normalized_path
389                );
390                ops.remove(&normalized_path);
391            }
392        }
393
394        // STEP 3: We have both locks, register the operation
395
396        // Create a new broadcast channel for this operation
397        // Capacity of 1 is enough since we only send one result
398        let (result_tx, _) = broadcast::channel(1);
399
400        // Create the active flag - starts as true (active)
401        let active_flag = Arc::new(std::sync::atomic::AtomicBool::new(true));
402
403        // Register this operation with timestamp
404        ops.insert(
405            normalized_path.clone(),
406            IndexingOperation {
407                result_tx: result_tx.clone(),
408                active: active_flag.clone(),
409                started_at: Instant::now(),
410            },
411        );
412
413        // Drop the write lock on the map
414        drop(ops);
415
416        Ok(IndexLockResult::Acquired(IndexLockGuard::new(
417            normalized_path,
418            self.indexing_ops.clone(),
419            result_tx,
420            active_flag,
421            fs_lock,
422        )))
423    }
424
425    /// Index a codebase directory
426    ///
427    /// This automatically performs full indexing for new codebases or incremental
428    /// updates for previously indexed codebases.
429    ///
430    /// # Example
431    ///
432    /// ```no_run
433    /// use project_rag::{RagClient, IndexRequest};
434    ///
435    /// # async fn example() -> anyhow::Result<()> {
436    /// let client = RagClient::new().await?;
437    ///
438    /// let request = IndexRequest {
439    ///     path: "/path/to/code".to_string(),
440    ///     project: Some("my-project".to_string()),
441    ///     include_patterns: vec!["**/*.rs".to_string()],
442    ///     exclude_patterns: vec!["**/target/**".to_string()],
443    ///     max_file_size: 1_048_576,
444    /// };
445    ///
446    /// let response = client.index_codebase(request).await?;
447    /// println!("Indexed {} files in {} ms",
448    ///          response.files_indexed,
449    ///          response.duration_ms);
450    /// # Ok(())
451    /// # }
452    /// ```
453    pub async fn index_codebase(&self, request: IndexRequest) -> Result<IndexResponse> {
454        // Validate request
455        request.validate().map_err(|e| anyhow::anyhow!(e))?;
456
457        // Use the smart indexing logic without progress notifications
458        // Default cancellation token - not cancellable from this API
459        let cancel_token = tokio_util::sync::CancellationToken::new();
460        indexing::do_index_smart(
461            self,
462            request.path,
463            request.project,
464            request.include_patterns,
465            request.exclude_patterns,
466            request.max_file_size,
467            None, // No peer
468            None, // No progress token
469            cancel_token,
470        )
471        .await
472    }
473
474    /// Query the indexed codebase using semantic search
475    ///
476    /// # Example
477    ///
478    /// ```no_run
479    /// use project_rag::{RagClient, QueryRequest};
480    ///
481    /// # async fn example() -> anyhow::Result<()> {
482    /// let client = RagClient::new().await?;
483    ///
484    /// let request = QueryRequest {
485    ///     query: "authentication logic".to_string(),
486    ///     project: Some("my-project".to_string()),
487    ///     limit: 10,
488    ///     min_score: 0.7,
489    ///     hybrid: true,
490    /// };
491    ///
492    /// let response = client.query_codebase(request).await?;
493    /// for result in response.results {
494    ///     println!("Found in {}: {:.2}", result.file_path, result.score);
495    ///     println!("{}", result.content);
496    /// }
497    /// # Ok(())
498    /// # }
499    /// ```
500    pub async fn query_codebase(&self, request: QueryRequest) -> Result<QueryResponse> {
501        request.validate().map_err(|e| anyhow::anyhow!(e))?;
502
503        // Check if the target path is dirty (if path filter is specified)
504        self.check_path_not_dirty(request.path.as_deref()).await?;
505
506        let start = Instant::now();
507
508        let query_embedding = self
509            .embedding_provider
510            .embed_batch(vec![request.query.clone()])
511            .context("Failed to generate query embedding")?
512            .into_iter()
513            .next()
514            .ok_or_else(|| anyhow::anyhow!("No embedding generated"))?;
515
516        let original_threshold = request.min_score;
517        let mut threshold_used = original_threshold;
518        let mut threshold_lowered = false;
519
520        let mut results = self
521            .vector_db
522            .search(
523                query_embedding.clone(),
524                &request.query,
525                request.limit,
526                threshold_used,
527                request.project.clone(),
528                request.path.clone(),
529                request.hybrid,
530            )
531            .await
532            .context("Failed to search")?;
533
534        if results.is_empty() && original_threshold > 0.3 {
535            let fallback_thresholds = [0.6, 0.5, 0.4, 0.3];
536
537            for &threshold in &fallback_thresholds {
538                if threshold >= original_threshold {
539                    continue;
540                }
541
542                results = self
543                    .vector_db
544                    .search(
545                        query_embedding.clone(),
546                        &request.query,
547                        request.limit,
548                        threshold,
549                        request.project.clone(),
550                        request.path.clone(),
551                        request.hybrid,
552                    )
553                    .await
554                    .context("Failed to search")?;
555
556                if !results.is_empty() {
557                    threshold_used = threshold;
558                    threshold_lowered = true;
559                    break;
560                }
561            }
562        }
563
564        Ok(QueryResponse {
565            results,
566            duration_ms: start.elapsed().as_millis() as u64,
567            threshold_used,
568            threshold_lowered,
569        })
570    }
571
572    /// Advanced search with filters for file type, language, and path patterns
573    pub async fn search_with_filters(
574        &self,
575        request: AdvancedSearchRequest,
576    ) -> Result<QueryResponse> {
577        request.validate().map_err(|e| anyhow::anyhow!(e))?;
578
579        // Check if the target path is dirty (if path filter is specified)
580        self.check_path_not_dirty(request.path.as_deref()).await?;
581
582        let start = Instant::now();
583
584        let query_embedding = self
585            .embedding_provider
586            .embed_batch(vec![request.query.clone()])
587            .context("Failed to generate query embedding")?
588            .into_iter()
589            .next()
590            .ok_or_else(|| anyhow::anyhow!("No embedding generated"))?;
591
592        let original_threshold = request.min_score;
593        let mut threshold_used = original_threshold;
594        let mut threshold_lowered = false;
595
596        let mut results = self
597            .vector_db
598            .search_filtered(
599                query_embedding.clone(),
600                &request.query,
601                request.limit,
602                threshold_used,
603                request.project.clone(),
604                request.path.clone(),
605                true,
606                request.file_extensions.clone(),
607                request.languages.clone(),
608                request.path_patterns.clone(),
609            )
610            .await
611            .context("Failed to search with filters")?;
612
613        // Adaptive threshold lowering if no results found
614        if results.is_empty() && original_threshold > 0.3 {
615            let fallback_thresholds = [0.6, 0.5, 0.4, 0.3];
616
617            for &threshold in &fallback_thresholds {
618                if threshold >= original_threshold {
619                    continue;
620                }
621
622                results = self
623                    .vector_db
624                    .search_filtered(
625                        query_embedding.clone(),
626                        &request.query,
627                        request.limit,
628                        threshold,
629                        request.project.clone(),
630                        request.path.clone(),
631                        true,
632                        request.file_extensions.clone(),
633                        request.languages.clone(),
634                        request.path_patterns.clone(),
635                    )
636                    .await
637                    .context("Failed to search with filters")?;
638
639                if !results.is_empty() {
640                    threshold_used = threshold;
641                    threshold_lowered = true;
642                    break;
643                }
644            }
645        }
646
647        Ok(QueryResponse {
648            results,
649            duration_ms: start.elapsed().as_millis() as u64,
650            threshold_used,
651            threshold_lowered,
652        })
653    }
654
655    /// Get statistics about the indexed codebase
656    pub async fn get_statistics(&self) -> Result<StatisticsResponse> {
657        let stats = self
658            .vector_db
659            .get_statistics()
660            .await
661            .context("Failed to get statistics")?;
662
663        let language_breakdown = stats
664            .language_breakdown
665            .into_iter()
666            .map(|(language, count)| LanguageStats {
667                language,
668                file_count: count,
669                chunk_count: count,
670            })
671            .collect();
672
673        Ok(StatisticsResponse {
674            total_files: stats.total_points,
675            total_chunks: stats.total_vectors,
676            total_embeddings: stats.total_vectors,
677            database_size_bytes: 0,
678            language_breakdown,
679        })
680    }
681
682    /// Clear all indexed data from the vector database
683    pub async fn clear_index(&self) -> Result<ClearResponse> {
684        match self.vector_db.clear().await {
685            Ok(_) => {
686                let mut cache = self.hash_cache.write().await;
687                cache.roots.clear();
688
689                if let Err(e) = cache.save(&self.cache_path) {
690                    tracing::warn!("Failed to save cleared cache: {}", e);
691                }
692
693                if let Err(e) = self
694                    .vector_db
695                    .initialize(self.embedding_provider.dimension())
696                    .await
697                {
698                    Ok(ClearResponse {
699                        success: false,
700                        message: format!("Cleared but failed to reinitialize: {}", e),
701                    })
702                } else {
703                    Ok(ClearResponse {
704                        success: true,
705                        message: "Successfully cleared all indexed data and cache".to_string(),
706                    })
707                }
708            }
709            Err(e) => Ok(ClearResponse {
710                success: false,
711                message: format!("Failed to clear index: {}", e),
712            }),
713        }
714    }
715
716    /// Search git commit history using semantic search
717    ///
718    /// # Example
719    ///
720    /// ```no_run
721    /// use project_rag::{RagClient, SearchGitHistoryRequest};
722    ///
723    /// # async fn example() -> anyhow::Result<()> {
724    /// let client = RagClient::new().await?;
725    ///
726    /// let request = SearchGitHistoryRequest {
727    ///     query: "bug fix authentication".to_string(),
728    ///     path: "/path/to/repo".to_string(),
729    ///     project: None,
730    ///     branch: None,
731    ///     max_commits: 100,
732    ///     limit: 10,
733    ///     min_score: 0.7,
734    ///     author: None,
735    ///     since: None,
736    ///     until: None,
737    ///     file_pattern: None,
738    /// };
739    ///
740    /// let response = client.search_git_history(request).await?;
741    /// for result in response.results {
742    ///     println!("Commit {}: {}", result.commit_hash, result.commit_message);
743    /// }
744    /// # Ok(())
745    /// # }
746    /// ```
747    pub async fn search_git_history(
748        &self,
749        request: SearchGitHistoryRequest,
750    ) -> Result<SearchGitHistoryResponse> {
751        // Validate request
752        request.validate().map_err(|e| anyhow::anyhow!(e))?;
753
754        // Forward to git indexing implementation
755        git_indexing::do_search_git_history(
756            self.embedding_provider.clone(),
757            self.vector_db.clone(),
758            self.git_cache.clone(),
759            &self.git_cache_path,
760            request,
761        )
762        .await
763    }
764
765    /// Get the configuration used by this client
766    pub fn config(&self) -> &Config {
767        &self.config
768    }
769
770    /// Get the embedding dimension used by this client
771    pub fn embedding_dimension(&self) -> usize {
772        self.embedding_provider.dimension()
773    }
774
775    /// Find the definition of a symbol at a given file location
776    ///
777    /// This method looks up the symbol at the specified location and returns
778    /// its definition information if found.
779    ///
780    /// # Arguments
781    ///
782    /// * `request` - The find definition request containing file path, line, and column
783    ///
784    /// # Returns
785    ///
786    /// A response containing the definition if found, along with precision info
787    pub async fn find_definition(&self, request: FindDefinitionRequest) -> Result<FindDefinitionResponse> {
788        let start = Instant::now();
789
790        // Validate request
791        request.validate().map_err(|e| anyhow::anyhow!(e))?;
792
793        // Create FileInfo for the file
794        let file_info = self.create_file_info(&request.file_path, request.project.clone())?;
795
796        // Get precision level for this language
797        let language = file_info.language.as_deref().unwrap_or("Unknown");
798        let precision = self.relations_provider.precision_level(language);
799
800        // Extract definitions from the file
801        let definitions = self
802            .relations_provider
803            .extract_definitions(&file_info)
804            .context("Failed to extract definitions")?;
805
806        // Find the definition at the requested position
807        let definition = definitions.into_iter().find(|def| {
808            request.line >= def.symbol_id.start_line
809                && request.line <= def.end_line
810                && (request.column == 0 || request.column >= def.symbol_id.start_col)
811        });
812
813        let result = definition.map(|def| DefinitionResult::from(&def));
814
815        Ok(FindDefinitionResponse {
816            definition: result,
817            precision: format!("{:?}", precision).to_lowercase(),
818            duration_ms: start.elapsed().as_millis() as u64,
819        })
820    }
821
822    /// Find all references to a symbol at a given file location
823    ///
824    /// This method finds all locations where the symbol at the given position
825    /// is referenced throughout the indexed codebase.
826    ///
827    /// # Arguments
828    ///
829    /// * `request` - The find references request containing file path, line, column, and limit
830    ///
831    /// # Returns
832    ///
833    /// A response containing the list of references found
834    pub async fn find_references(&self, request: FindReferencesRequest) -> Result<FindReferencesResponse> {
835        let start = Instant::now();
836
837        // Validate request
838        request.validate().map_err(|e| anyhow::anyhow!(e))?;
839
840        // Create FileInfo for the file
841        let file_info = self.create_file_info(&request.file_path, request.project.clone())?;
842
843        // Get precision level for this language
844        let language = file_info.language.as_deref().unwrap_or("Unknown");
845        let precision = self.relations_provider.precision_level(language);
846
847        // Extract definitions from the file to find the symbol at the position
848        let definitions = self
849            .relations_provider
850            .extract_definitions(&file_info)
851            .context("Failed to extract definitions")?;
852
853        // Find the symbol at the requested position
854        let target_symbol = definitions.iter().find(|def| {
855            request.line >= def.symbol_id.start_line
856                && request.line <= def.end_line
857                && (request.column == 0 || request.column >= def.symbol_id.start_col)
858        });
859
860        let symbol_name = target_symbol.map(|def| def.symbol_id.name.clone());
861
862        // If no symbol found at position, return empty result
863        if symbol_name.is_none() {
864            return Ok(FindReferencesResponse {
865                symbol_name: None,
866                references: Vec::new(),
867                total_count: 0,
868                precision: format!("{:?}", precision).to_lowercase(),
869                duration_ms: start.elapsed().as_millis() as u64,
870            });
871        }
872
873        let symbol_name_str = symbol_name.clone().unwrap();
874
875        // Build symbol index from definitions
876        let mut symbol_index: std::collections::HashMap<String, Vec<crate::relations::Definition>> =
877            std::collections::HashMap::new();
878        for def in definitions {
879            symbol_index
880                .entry(def.symbol_id.name.clone())
881                .or_default()
882                .push(def);
883        }
884
885        // Find references in the same file
886        let references = self
887            .relations_provider
888            .extract_references(&file_info, &symbol_index)
889            .context("Failed to extract references")?;
890
891        // Filter to references matching our target symbol
892        let matching_refs: Vec<ReferenceResult> = references
893            .iter()
894            .filter(|r| {
895                // Check if this reference points to our target symbol
896                r.target_symbol_id.contains(&symbol_name_str)
897            })
898            .take(request.limit)
899            .map(|r| ReferenceResult::from(r))
900            .collect();
901
902        let total_count = matching_refs.len();
903
904        Ok(FindReferencesResponse {
905            symbol_name,
906            references: matching_refs,
907            total_count,
908            precision: format!("{:?}", precision).to_lowercase(),
909            duration_ms: start.elapsed().as_millis() as u64,
910        })
911    }
912
913    /// Get the call graph for a function at a given file location
914    ///
915    /// This method returns the callers (incoming calls) and callees (outgoing calls)
916    /// for the function at the specified location.
917    ///
918    /// # Arguments
919    ///
920    /// * `request` - The call graph request containing file path, line, column, and depth
921    ///
922    /// # Returns
923    ///
924    /// A response containing the root symbol and its call graph
925    pub async fn get_call_graph(&self, request: GetCallGraphRequest) -> Result<GetCallGraphResponse> {
926        let start = Instant::now();
927
928        // Validate request
929        request.validate().map_err(|e| anyhow::anyhow!(e))?;
930
931        // Create FileInfo for the file
932        let file_info = self.create_file_info(&request.file_path, request.project.clone())?;
933
934        // Get precision level for this language
935        let language = file_info.language.as_deref().unwrap_or("Unknown");
936        let precision = self.relations_provider.precision_level(language);
937
938        // Extract definitions from the file to find the function at the position
939        let definitions = self
940            .relations_provider
941            .extract_definitions(&file_info)
942            .context("Failed to extract definitions")?;
943
944        // Find the function at the requested position
945        let target_function = definitions.iter().find(|def| {
946            // Only consider functions/methods
947            matches!(
948                def.symbol_id.kind,
949                crate::relations::SymbolKind::Function | crate::relations::SymbolKind::Method
950            ) && request.line >= def.symbol_id.start_line
951                && request.line <= def.end_line
952                && (request.column == 0 || request.column >= def.symbol_id.start_col)
953        });
954
955        // If no function found at position, return empty result
956        let root_symbol = match target_function {
957            Some(func) => crate::relations::SymbolInfo {
958                name: func.symbol_id.name.clone(),
959                kind: func.symbol_id.kind.clone(),
960                file_path: request.file_path.clone(),
961                start_line: func.symbol_id.start_line,
962                end_line: func.end_line,
963                signature: func.signature.clone(),
964            },
965            None => {
966                return Ok(GetCallGraphResponse {
967                    root_symbol: None,
968                    callers: Vec::new(),
969                    callees: Vec::new(),
970                    precision: format!("{:?}", precision).to_lowercase(),
971                    duration_ms: start.elapsed().as_millis() as u64,
972                });
973            }
974        };
975
976        let function_name = root_symbol.name.clone();
977
978        // Build symbol index from definitions
979        let mut symbol_index: std::collections::HashMap<String, Vec<crate::relations::Definition>> =
980            std::collections::HashMap::new();
981        for def in &definitions {
982            symbol_index
983                .entry(def.symbol_id.name.clone())
984                .or_default()
985                .push(def.clone());
986        }
987
988        // Find references in the same file to identify callers
989        let references = self
990            .relations_provider
991            .extract_references(&file_info, &symbol_index)
992            .context("Failed to extract references")?;
993
994        // Find callers (references with Call kind pointing to our function)
995        let mut seen_callers = std::collections::HashSet::new();
996        let callers: Vec<crate::relations::CallGraphNode> = references
997            .iter()
998            .filter(|r| {
999                r.reference_kind == crate::relations::ReferenceKind::Call
1000                    && r.target_symbol_id.contains(&function_name)
1001            })
1002            .filter_map(|r| {
1003                // Try to find which function contains this call
1004                definitions.iter().find(|def| {
1005                    matches!(
1006                        def.symbol_id.kind,
1007                        crate::relations::SymbolKind::Function | crate::relations::SymbolKind::Method
1008                    ) && r.start_line >= def.symbol_id.start_line
1009                        && r.start_line <= def.end_line
1010                })
1011            })
1012            .filter(|def| seen_callers.insert(def.symbol_id.name.clone()))
1013            .map(|def| crate::relations::CallGraphNode {
1014                name: def.symbol_id.name.clone(),
1015                kind: def.symbol_id.kind.clone(),
1016                file_path: request.file_path.clone(),
1017                line: def.symbol_id.start_line,
1018                children: Vec::new(),
1019            })
1020            .collect();
1021
1022        // Find callees (calls made from within our function)
1023        let target_func = target_function.unwrap();
1024        let mut seen_callees = std::collections::HashSet::new();
1025        let callees: Vec<crate::relations::CallGraphNode> = references
1026            .iter()
1027            .filter(|r| {
1028                r.reference_kind == crate::relations::ReferenceKind::Call
1029                    && r.start_line >= target_func.symbol_id.start_line
1030                    && r.start_line <= target_func.end_line
1031            })
1032            .filter_map(|r| {
1033                // Extract the called function name from target_symbol_id
1034                let parts: Vec<&str> = r.target_symbol_id.split(':').collect();
1035                if parts.len() >= 2 {
1036                    Some(parts[1].to_string())
1037                } else {
1038                    None
1039                }
1040            })
1041            .filter(|name| seen_callees.insert(name.clone()))
1042            .filter_map(|name| {
1043                // Find the definition of the called function
1044                symbol_index.get(&name).and_then(|defs| defs.first()).cloned()
1045            })
1046            .map(|def| crate::relations::CallGraphNode {
1047                name: def.symbol_id.name.clone(),
1048                kind: def.symbol_id.kind.clone(),
1049                file_path: request.file_path.clone(),
1050                line: def.symbol_id.start_line,
1051                children: Vec::new(),
1052            })
1053            .collect();
1054
1055        Ok(GetCallGraphResponse {
1056            root_symbol: Some(root_symbol),
1057            callers,
1058            callees,
1059            precision: format!("{:?}", precision).to_lowercase(),
1060            duration_ms: start.elapsed().as_millis() as u64,
1061        })
1062    }
1063}
1064
1065// Indexing operations module
1066pub(crate) mod indexing;
1067// Git indexing operations module
1068pub(crate) mod git_indexing;
1069
1070#[cfg(test)]
1071mod tests;
project_rag/client/mod.rs

project_rag/client/
mod.rs