project_rag/lib.rs
1//! # Project RAG - RAG-based Codebase Indexing and Semantic Search
2//!
3//! A dual-purpose Rust library and MCP server for semantic code search using RAG
4//! (Retrieval-Augmented Generation).
5//!
6//! ## Overview
7//!
8//! Project RAG combines vector embeddings with BM25 keyword search to enable semantic
9//! code search across large projects. It supports incremental indexing, git history search,
10//! and provides both a Rust library API and an MCP server for AI assistant integration.
11//!
12//! ## Architecture
13//!
14//! - **RagClient**: Core library containing all functionality (embeddings, vector DB, indexing, search)
15//! - **RagMcpServer**: Thin wrapper around RagClient that exposes functionality via MCP protocol
16//! - Both library and MCP server are always built together - no feature flags needed
17//!
18//! ## Key Features
19//!
20//! - **Semantic Search**: FastEmbed (all-MiniLM-L6-v2) for local embeddings
21//! - **Hybrid Search**: Combines vector similarity with BM25 keyword matching (RRF)
22//! - **Dual Database Support**: LanceDB (embedded, default) or Qdrant (external server)
23//! - **Smart Indexing**: Auto-detects full vs incremental updates with persistent caching
24//! - **AST-Based Chunking**: Tree-sitter parsing for 12 programming languages
25//! - **Git History Search**: Semantic search over commit history with on-demand indexing
26//! - **Dual API**: Use as a Rust library or as an MCP server for AI assistants
27//!
28//! ## Library Usage Example
29//!
30//! ```no_run
31//! use project_rag::{RagClient, IndexRequest, QueryRequest};
32//!
33//! #[tokio::main]
34//! async fn main() -> anyhow::Result<()> {
35//! // Create client with default configuration
36//! let client = RagClient::new().await?;
37//!
38//! // Index a codebase
39//! let index_req = IndexRequest {
40//! path: "/path/to/codebase".to_string(),
41//! project: Some("my-project".to_string()),
42//! include_patterns: vec!["**/*.rs".to_string()],
43//! exclude_patterns: vec!["**/target/**".to_string()],
44//! max_file_size: 1_048_576,
45//! };
46//! let index_response = client.index_codebase(index_req).await?;
47//! println!("Indexed {} files", index_response.files_indexed);
48//!
49//! // Query the codebase
50//! let query_req = QueryRequest {
51//! query: "authentication logic".to_string(),
52//! project: Some("my-project".to_string()),
53//! limit: 10,
54//! min_score: 0.7,
55//! hybrid: true,
56//! };
57//! let query_response = client.query_codebase(query_req).await?;
58//! for result in query_response.results {
59//! println!("Found in {}: score {}", result.file_path, result.score);
60//! }
61//!
62//! Ok(())
63//! }
64//! ```
65//!
66//! ## MCP Server Usage Example
67//!
68//! The MCP server wraps RagClient and exposes it via the MCP protocol:
69//!
70//! ```no_run
71//! use project_rag::mcp_server::RagMcpServer;
72//!
73//! #[tokio::main]
74//! async fn main() -> anyhow::Result<()> {
75//! // Create server (internally creates a RagClient)
76//! let server = RagMcpServer::new().await?;
77//!
78//! // Serve over stdio (MCP protocol)
79//! server.serve_stdio().await?;
80//!
81//! Ok(())
82//! }
83//! ```
84//!
85//! Or you can create a server with an existing client:
86//!
87//! ```no_run
88//! use project_rag::{RagClient, mcp_server::RagMcpServer};
89//! use std::sync::Arc;
90//!
91//! #[tokio::main]
92//! async fn main() -> anyhow::Result<()> {
93//! // Create client with custom configuration
94//! let client = RagClient::new().await?;
95//!
96//! // Wrap client in MCP server
97//! let server = RagMcpServer::with_client(Arc::new(client))?;
98//!
99//! server.serve_stdio().await?;
100//! Ok(())
101//! }
102//! ```
103//!
104//! ## Modules
105//!
106//! - [`client`]: Core library client API with all functionality
107//! - [`mcp_server`]: MCP protocol server implementation that wraps the client
108//! - [`embedding`]: Embedding generation using FastEmbed
109//! - [`vector_db`]: Vector database abstraction (LanceDB and Qdrant)
110//! - [`bm25_search`]: BM25 keyword search using Tantivy
111//! - [`indexer`]: File walking, AST parsing, and code chunking
112//! - [`git`]: Git history walking and commit chunking
113//! - [`cache`]: Persistent hash cache for incremental updates
114//! - [`git_cache`]: Git commit tracking cache
115//! - [`config`]: Configuration management with environment variable support
116//! - [`types`]: Request/response types with validation
117//! - [`error`]: Error types and result aliases
118//! - [`paths`]: Path normalization utilities
119
120// Core modules (always available)
121/// BM25 keyword search using Tantivy for hybrid search
122pub mod bm25_search;
123
124/// Persistent hash cache for tracking file changes across restarts
125pub mod cache;
126
127/// Configuration management with environment variable overrides
128pub mod config;
129
130/// Embedding generation using FastEmbed (all-MiniLM-L6-v2)
131pub mod embedding;
132
133/// Error types and utilities
134pub mod error;
135
136/// Git repository walking and commit extraction
137pub mod git;
138
139/// Git commit tracking cache for incremental git history indexing
140pub mod git_cache;
141
142/// Glob pattern matching utilities for path filtering
143pub mod glob_utils;
144
145/// File walking, code chunking, and AST parsing
146pub mod indexer;
147
148/// Path normalization and utility functions
149pub mod paths;
150
151/// Code relationships: definitions, references, call graphs
152pub mod relations;
153
154/// Request/response types with validation
155pub mod types;
156
157/// Vector database abstraction supporting LanceDB and Qdrant
158pub mod vector_db;
159
160// Library client API (core functionality)
161pub mod client;
162pub use client::RagClient;
163
164// MCP server (wraps the client and exposes via MCP protocol)
165pub mod mcp_server;
166
167// Re-export commonly used types for convenience
168pub use types::{
169 AdvancedSearchRequest, ClearRequest, ClearResponse, FindDefinitionRequest,
170 FindDefinitionResponse, FindReferencesRequest, FindReferencesResponse, GetCallGraphRequest,
171 GetCallGraphResponse, GitSearchResult, IndexRequest, IndexResponse, IndexingMode,
172 LanguageStats, QueryRequest, QueryResponse, SearchGitHistoryRequest, SearchGitHistoryResponse,
173 SearchResult, StatisticsRequest, StatisticsResponse,
174};
175
176pub use config::Config;
177pub use error::RagError;