project_rag/
lib.rs

1//! # Project RAG - RAG-based Codebase Indexing and Semantic Search
2//!
3//! A dual-purpose Rust library and MCP server for semantic code search using RAG
4//! (Retrieval-Augmented Generation).
5//!
6//! ## Overview
7//!
8//! Project RAG combines vector embeddings with BM25 keyword search to enable semantic
9//! code search across large projects. It supports incremental indexing, git history search,
10//! and provides both a Rust library API and an MCP server for AI assistant integration.
11//!
12//! ## Architecture
13//!
14//! - **RagClient**: Core library containing all functionality (embeddings, vector DB, indexing, search)
15//! - **RagMcpServer**: Thin wrapper around RagClient that exposes functionality via MCP protocol
16//! - Both library and MCP server are always built together - no feature flags needed
17//!
18//! ## Key Features
19//!
20//! - **Semantic Search**: FastEmbed (all-MiniLM-L6-v2) for local embeddings
21//! - **Hybrid Search**: Combines vector similarity with BM25 keyword matching (RRF)
22//! - **Dual Database Support**: LanceDB (embedded, default) or Qdrant (external server)
23//! - **Smart Indexing**: Auto-detects full vs incremental updates with persistent caching
24//! - **AST-Based Chunking**: Tree-sitter parsing for 12 programming languages
25//! - **Git History Search**: Semantic search over commit history with on-demand indexing
26//! - **Dual API**: Use as a Rust library or as an MCP server for AI assistants
27//!
28//! ## Library Usage Example
29//!
30//! ```no_run
31//! use project_rag::{RagClient, IndexRequest, QueryRequest};
32//!
33//! #[tokio::main]
34//! async fn main() -> anyhow::Result<()> {
35//!     // Create client with default configuration
36//!     let client = RagClient::new().await?;
37//!
38//!     // Index a codebase
39//!     let index_req = IndexRequest {
40//!         path: "/path/to/codebase".to_string(),
41//!         project: Some("my-project".to_string()),
42//!         include_patterns: vec!["**/*.rs".to_string()],
43//!         exclude_patterns: vec!["**/target/**".to_string()],
44//!         max_file_size: 1_048_576,
45//!     };
46//!     let index_response = client.index_codebase(index_req).await?;
47//!     println!("Indexed {} files", index_response.files_indexed);
48//!
49//!     // Query the codebase
50//!     let query_req = QueryRequest {
51//!         query: "authentication logic".to_string(),
52//!         project: Some("my-project".to_string()),
53//!         limit: 10,
54//!         min_score: 0.7,
55//!         hybrid: true,
56//!     };
57//!     let query_response = client.query_codebase(query_req).await?;
58//!     for result in query_response.results {
59//!         println!("Found in {}: score {}", result.file_path, result.score);
60//!     }
61//!
62//!     Ok(())
63//! }
64//! ```
65//!
66//! ## MCP Server Usage Example
67//!
68//! The MCP server wraps RagClient and exposes it via the MCP protocol:
69//!
70//! ```no_run
71//! use project_rag::mcp_server::RagMcpServer;
72//!
73//! #[tokio::main]
74//! async fn main() -> anyhow::Result<()> {
75//!     // Create server (internally creates a RagClient)
76//!     let server = RagMcpServer::new().await?;
77//!
78//!     // Serve over stdio (MCP protocol)
79//!     server.serve_stdio().await?;
80//!
81//!     Ok(())
82//! }
83//! ```
84//!
85//! Or you can create a server with an existing client:
86//!
87//! ```no_run
88//! use project_rag::{RagClient, mcp_server::RagMcpServer};
89//! use std::sync::Arc;
90//!
91//! #[tokio::main]
92//! async fn main() -> anyhow::Result<()> {
93//!     // Create client with custom configuration
94//!     let client = RagClient::new().await?;
95//!
96//!     // Wrap client in MCP server
97//!     let server = RagMcpServer::with_client(Arc::new(client))?;
98//!
99//!     server.serve_stdio().await?;
100//!     Ok(())
101//! }
102//! ```
103//!
104//! ## Modules
105//!
106//! - [`client`]: Core library client API with all functionality
107//! - [`mcp_server`]: MCP protocol server implementation that wraps the client
108//! - [`embedding`]: Embedding generation using FastEmbed
109//! - [`vector_db`]: Vector database abstraction (LanceDB and Qdrant)
110//! - [`bm25_search`]: BM25 keyword search using Tantivy
111//! - [`indexer`]: File walking, AST parsing, and code chunking
112//! - [`git`]: Git history walking and commit chunking
113//! - [`cache`]: Persistent hash cache for incremental updates
114//! - [`git_cache`]: Git commit tracking cache
115//! - [`config`]: Configuration management with environment variable support
116//! - [`types`]: Request/response types with validation
117//! - [`error`]: Error types and result aliases
118//! - [`paths`]: Path normalization utilities
119
120// Core modules (always available)
121/// BM25 keyword search using Tantivy for hybrid search
122pub mod bm25_search;
123
124/// Persistent hash cache for tracking file changes across restarts
125pub mod cache;
126
127/// Configuration management with environment variable overrides
128pub mod config;
129
130/// Embedding generation using FastEmbed (all-MiniLM-L6-v2)
131pub mod embedding;
132
133/// Error types and utilities
134pub mod error;
135
136/// Git repository walking and commit extraction
137pub mod git;
138
139/// Git commit tracking cache for incremental git history indexing
140pub mod git_cache;
141
142/// Glob pattern matching utilities for path filtering
143pub mod glob_utils;
144
145/// File walking, code chunking, and AST parsing
146pub mod indexer;
147
148/// Path normalization and utility functions
149pub mod paths;
150
151/// Code relationships: definitions, references, call graphs
152pub mod relations;
153
154/// Request/response types with validation
155pub mod types;
156
157/// Vector database abstraction supporting LanceDB and Qdrant
158pub mod vector_db;
159
160// Library client API (core functionality)
161pub mod client;
162pub use client::RagClient;
163
164// MCP server (wraps the client and exposes via MCP protocol)
165pub mod mcp_server;
166
167// Re-export commonly used types for convenience
168pub use types::{
169    AdvancedSearchRequest, ClearRequest, ClearResponse, FindDefinitionRequest,
170    FindDefinitionResponse, FindReferencesRequest, FindReferencesResponse, GetCallGraphRequest,
171    GetCallGraphResponse, GitSearchResult, IndexRequest, IndexResponse, IndexingMode,
172    LanguageStats, QueryRequest, QueryResponse, SearchGitHistoryRequest, SearchGitHistoryResponse,
173    SearchResult, StatisticsRequest, StatisticsResponse,
174};
175
176pub use config::Config;
177pub use error::RagError;