1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
//! # Project RAG - RAG-based Codebase Indexing and Semantic Search
//!
//! A dual-purpose Rust library and MCP server for semantic code search using RAG
//! (Retrieval-Augmented Generation).
//!
//! ## Overview
//!
//! Project RAG combines vector embeddings with BM25 keyword search to enable semantic
//! code search across large projects. It supports incremental indexing, git history search,
//! and provides both a Rust library API and an MCP server for AI assistant integration.
//!
//! ## Architecture
//!
//! - **RagClient**: Core library containing all functionality (embeddings, vector DB, indexing, search)
//! - **RagMcpServer**: Thin wrapper around RagClient that exposes functionality via MCP protocol
//! - Both library and MCP server are always built together - no feature flags needed
//!
//! ## Key Features
//!
//! - **Semantic Search**: FastEmbed (all-MiniLM-L6-v2) for local embeddings
//! - **Hybrid Search**: Combines vector similarity with BM25 keyword matching (RRF)
//! - **Dual Database Support**: LanceDB (embedded, default) or Qdrant (external server)
//! - **Smart Indexing**: Auto-detects full vs incremental updates with persistent caching
//! - **AST-Based Chunking**: Tree-sitter parsing for 12 programming languages
//! - **Git History Search**: Semantic search over commit history with on-demand indexing
//! - **Dual API**: Use as a Rust library or as an MCP server for AI assistants
//!
//! ## Library Usage Example
//!
//! ```no_run
//! use project_rag::{RagClient, IndexRequest, QueryRequest};
//!
//! #[tokio::main]
//! async fn main() -> anyhow::Result<()> {
//! // Create client with default configuration
//! let client = RagClient::new().await?;
//!
//! // Index a codebase
//! let index_req = IndexRequest {
//! path: "/path/to/codebase".to_string(),
//! project: Some("my-project".to_string()),
//! include_patterns: vec!["**/*.rs".to_string()],
//! exclude_patterns: vec!["**/target/**".to_string()],
//! max_file_size: 1_048_576,
//! };
//! let index_response = client.index_codebase(index_req).await?;
//! println!("Indexed {} files", index_response.files_indexed);
//!
//! // Query the codebase
//! let query_req = QueryRequest {
//! query: "authentication logic".to_string(),
//! project: Some("my-project".to_string()),
//! limit: 10,
//! min_score: 0.7,
//! hybrid: true,
//! };
//! let query_response = client.query_codebase(query_req).await?;
//! for result in query_response.results {
//! println!("Found in {}: score {}", result.file_path, result.score);
//! }
//!
//! Ok(())
//! }
//! ```
//!
//! ## MCP Server Usage Example
//!
//! The MCP server wraps RagClient and exposes it via the MCP protocol:
//!
//! ```no_run
//! use project_rag::mcp_server::RagMcpServer;
//!
//! #[tokio::main]
//! async fn main() -> anyhow::Result<()> {
//! // Create server (internally creates a RagClient)
//! let server = RagMcpServer::new().await?;
//!
//! // Serve over stdio (MCP protocol)
//! server.serve_stdio().await?;
//!
//! Ok(())
//! }
//! ```
//!
//! Or you can create a server with an existing client:
//!
//! ```no_run
//! use project_rag::{RagClient, mcp_server::RagMcpServer};
//! use std::sync::Arc;
//!
//! #[tokio::main]
//! async fn main() -> anyhow::Result<()> {
//! // Create client with custom configuration
//! let client = RagClient::new().await?;
//!
//! // Wrap client in MCP server
//! let server = RagMcpServer::with_client(Arc::new(client))?;
//!
//! server.serve_stdio().await?;
//! Ok(())
//! }
//! ```
//!
//! ## Modules
//!
//! - [`client`]: Core library client API with all functionality
//! - [`mcp_server`]: MCP protocol server implementation that wraps the client
//! - [`embedding`]: Embedding generation using FastEmbed
//! - [`vector_db`]: Vector database abstraction (LanceDB and Qdrant)
//! - [`bm25_search`]: BM25 keyword search using Tantivy
//! - [`indexer`]: File walking, AST parsing, and code chunking
//! - [`git`]: Git history walking and commit chunking
//! - [`cache`]: Persistent hash cache for incremental updates
//! - [`git_cache`]: Git commit tracking cache
//! - [`config`]: Configuration management with environment variable support
//! - [`types`]: Request/response types with validation
//! - [`error`]: Error types and result aliases
//! - [`paths`]: Path normalization utilities
// Core modules (always available)
/// BM25 keyword search using Tantivy for hybrid search
/// Persistent hash cache for tracking file changes across restarts
/// Configuration management with environment variable overrides
/// Embedding generation using FastEmbed (all-MiniLM-L6-v2)
/// Error types and utilities
/// Git repository walking and commit extraction
/// Git commit tracking cache for incremental git history indexing
/// Glob pattern matching utilities for path filtering
/// File walking, code chunking, and AST parsing
/// Path normalization and utility functions
/// Code relationships: definitions, references, call graphs
/// Request/response types with validation
/// Vector database abstraction supporting LanceDB and Qdrant
// Library client API (core functionality)
pub use RagClient;
// MCP server (wraps the client and exposes via MCP protocol)
// Re-export commonly used types for convenience
pub use ;
pub use Config;
pub use RagError;