project-rag 0.1.0

RAG-based codebase indexing and semantic search - dual purpose library and MCP server
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
use crate::client::RagClient;
use crate::types::*;

use anyhow::{Context, Result};
use rmcp::{
    ErrorData as McpError, Peer, RoleServer, ServerHandler, ServiceExt,
    handler::server::{router::prompt::PromptRouter, tool::ToolRouter, wrapper::Parameters},
    model::*,
    prompt, prompt_handler, prompt_router,
    service::RequestContext,
    tool, tool_handler, tool_router,
};
use std::sync::Arc;
use tokio_util::sync::CancellationToken;

/// Guard that cancels a CancellationToken when dropped.
/// This ensures that if the async handler's future is dropped (e.g., due to client disconnect),
/// the cancellation token is triggered, allowing cooperative cancellation of long-running operations.
struct CancelOnDropGuard {
    token: CancellationToken,
}

impl CancelOnDropGuard {
    fn new(token: CancellationToken) -> Self {
        Self { token }
    }
}

impl Drop for CancelOnDropGuard {
    fn drop(&mut self) {
        if !self.token.is_cancelled() {
            tracing::info!("Tool handler dropped, triggering cancellation");
            self.token.cancel();
        }
    }
}

#[derive(Clone)]
pub struct RagMcpServer {
    client: Arc<RagClient>,
    tool_router: ToolRouter<Self>,
    prompt_router: PromptRouter<Self>,
}

impl RagMcpServer {
    /// Create a new RAG MCP server with default configuration
    pub async fn new() -> Result<Self> {
        let client = RagClient::new().await?;
        Self::with_client(Arc::new(client))
    }

    /// Create a new RAG MCP server with an existing client
    pub fn with_client(client: Arc<RagClient>) -> Result<Self> {
        Ok(Self {
            client,
            tool_router: Self::tool_router(),
            prompt_router: Self::prompt_router(),
        })
    }

    /// Get the underlying client
    pub fn client(&self) -> &RagClient {
        &self.client
    }

    /// Create a new RAG MCP server with custom configuration
    pub async fn with_config(config: crate::config::Config) -> Result<Self> {
        let client = RagClient::with_config(config).await?;
        Self::with_client(Arc::new(client))
    }

    /// Normalize a path to a canonical absolute form for consistent cache lookups
    pub fn normalize_path(path: &str) -> Result<String> {
        RagClient::normalize_path(path)
    }

    /// Index a codebase directory (convenience method for testing)
    #[allow(clippy::too_many_arguments)]
    pub async fn do_index(
        &self,
        path: String,
        project: Option<String>,
        include_patterns: Vec<String>,
        exclude_patterns: Vec<String>,
        max_file_size: usize,
        peer: Option<Peer<RoleServer>>,
        progress_token: Option<ProgressToken>,
        cancel_token: Option<CancellationToken>,
    ) -> Result<IndexResponse> {
        let cancel_token = cancel_token.unwrap_or_default();
        crate::client::indexing::do_index_smart(
            &self.client,
            path,
            project,
            include_patterns,
            exclude_patterns,
            max_file_size,
            peer,
            progress_token,
            cancel_token,
        )
        .await
    }
}

#[tool_router(router = tool_router)]
impl RagMcpServer {
    #[tool(
        description = "Index a codebase directory, creating embeddings for semantic search. Automatically performs full indexing for new codebases or incremental updates for previously indexed codebases."
    )]
    async fn index_codebase(
        &self,
        meta: Meta,
        peer: Peer<RoleServer>,
        Parameters(req): Parameters<IndexRequest>,
    ) -> Result<String, String> {
        // Validate request inputs
        req.validate()?;

        // Get progress token if provided
        let progress_token = meta.get_progress_token();

        // Create a cancellation token for this indexing operation
        // When this handler's future is dropped (e.g., client disconnects),
        // the CancellationToken will be dropped and signal cancellation
        let cancel_token = CancellationToken::new();
        let cancel_token_for_index = cancel_token.clone();

        // Use a guard to cancel on drop
        let _cancel_guard = CancelOnDropGuard::new(cancel_token);

        let response = crate::client::indexing::do_index_smart(
            &self.client,
            req.path,
            req.project,
            req.include_patterns,
            req.exclude_patterns,
            req.max_file_size,
            Some(peer),
            progress_token,
            cancel_token_for_index,
        )
        .await
        .map_err(|e| format!("{:#}", e))?; // Use alternate display to show full error chain

        serde_json::to_string_pretty(&response).map_err(|e| format!("Serialization failed: {}", e))
    }

    #[tool(description = "Query the indexed codebase using semantic search")]
    async fn query_codebase(
        &self,
        Parameters(req): Parameters<QueryRequest>,
    ) -> Result<String, String> {
        // Validate request inputs
        req.validate()?;

        let response = self
            .client
            .query_codebase(req)
            .await
            .map_err(|e| format!("{:#}", e))?;

        serde_json::to_string_pretty(&response).map_err(|e| format!("Serialization failed: {}", e))
    }

    #[tool(description = "Get statistics about the indexed codebase")]
    async fn get_statistics(
        &self,
        Parameters(_req): Parameters<StatisticsRequest>,
    ) -> Result<String, String> {
        let response = self
            .client
            .get_statistics()
            .await
            .map_err(|e| format!("{:#}", e))?;

        serde_json::to_string_pretty(&response).map_err(|e| format!("Serialization failed: {}", e))
    }

    #[tool(description = "Clear all indexed data from the vector database")]
    async fn clear_index(
        &self,
        Parameters(_req): Parameters<ClearRequest>,
    ) -> Result<String, String> {
        let response = self
            .client
            .clear_index()
            .await
            .map_err(|e| format!("{:#}", e))?;

        serde_json::to_string_pretty(&response).map_err(|e| format!("Serialization failed: {}", e))
    }

    #[tool(description = "Advanced search with filters for file type, language, and path patterns")]
    async fn search_by_filters(
        &self,
        Parameters(req): Parameters<AdvancedSearchRequest>,
    ) -> Result<String, String> {
        // Validate request inputs
        req.validate()?;

        let response = self
            .client
            .search_with_filters(req)
            .await
            .map_err(|e| format!("{:#}", e))?;

        serde_json::to_string_pretty(&response).map_err(|e| format!("Serialization failed: {}", e))
    }

    #[tool(description = "Search git commit history using semantic search with on-demand indexing")]
    async fn search_git_history(
        &self,
        Parameters(req): Parameters<SearchGitHistoryRequest>,
    ) -> Result<String, String> {
        // Validate request inputs
        req.validate()?;

        let response = self
            .client
            .search_git_history(req)
            .await
            .map_err(|e| format!("{:#}", e))?;

        serde_json::to_string_pretty(&response).map_err(|e| format!("Serialization failed: {}", e))
    }

    #[tool(description = "Find the definition of a symbol at a given file location (line and column)")]
    async fn find_definition(
        &self,
        Parameters(req): Parameters<FindDefinitionRequest>,
    ) -> Result<String, String> {
        // Validate request inputs
        req.validate()?;

        let response = self
            .client
            .find_definition(req)
            .await
            .map_err(|e| format!("{:#}", e))?;

        serde_json::to_string_pretty(&response).map_err(|e| format!("Serialization failed: {}", e))
    }

    #[tool(description = "Find all references to a symbol at a given file location")]
    async fn find_references(
        &self,
        Parameters(req): Parameters<FindReferencesRequest>,
    ) -> Result<String, String> {
        // Validate request inputs
        req.validate()?;

        let response = self
            .client
            .find_references(req)
            .await
            .map_err(|e| format!("{:#}", e))?;

        serde_json::to_string_pretty(&response).map_err(|e| format!("Serialization failed: {}", e))
    }

    #[tool(description = "Get the call graph for a function at a given file location (callers and callees)")]
    async fn get_call_graph(
        &self,
        Parameters(req): Parameters<GetCallGraphRequest>,
    ) -> Result<String, String> {
        // Validate request inputs
        req.validate()?;

        let response = self
            .client
            .get_call_graph(req)
            .await
            .map_err(|e| format!("{:#}", e))?;

        serde_json::to_string_pretty(&response).map_err(|e| format!("Serialization failed: {}", e))
    }
}

// Prompts for slash commands
#[prompt_router]
impl RagMcpServer {
    #[prompt(
        name = "index",
        description = "Index a codebase directory to enable semantic search (automatically performs full or incremental based on existing index)"
    )]
    async fn index_prompt(
        &self,
        Parameters(args): Parameters<serde_json::Value>,
    ) -> Result<GetPromptResult, McpError> {
        let path = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");

        let messages = vec![PromptMessage::new_text(
            PromptMessageRole::User,
            format!(
                "Please index the codebase at path: '{}'. This will automatically perform a full index if this is the first time, or an incremental update if the codebase has been indexed before.",
                path
            ),
        )];

        Ok(GetPromptResult {
            description: Some(format!(
                "Index codebase at {} (auto-detects full/incremental)",
                path
            )),
            messages,
        })
    }

    #[prompt(
        name = "query",
        description = "Search the indexed codebase using semantic search"
    )]
    async fn query_prompt(
        &self,
        Parameters(args): Parameters<serde_json::Value>,
    ) -> Result<Vec<PromptMessage>, McpError> {
        let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");

        Ok(vec![PromptMessage::new_text(
            PromptMessageRole::User,
            format!("Please search the codebase for: {}", query),
        )])
    }

    #[prompt(
        name = "stats",
        description = "Get statistics about the indexed codebase"
    )]
    async fn stats_prompt(&self) -> Vec<PromptMessage> {
        vec![PromptMessage::new_text(
            PromptMessageRole::User,
            "Please get statistics about the indexed codebase.",
        )]
    }

    #[prompt(
        name = "clear",
        description = "Clear all indexed data from the vector database"
    )]
    async fn clear_prompt(&self) -> Vec<PromptMessage> {
        vec![PromptMessage::new_text(
            PromptMessageRole::User,
            "Please clear all indexed data from the vector database.",
        )]
    }

    #[prompt(
        name = "search",
        description = "Advanced search with filters (file type, language, path)"
    )]
    async fn search_prompt(
        &self,
        Parameters(args): Parameters<serde_json::Value>,
    ) -> Result<Vec<PromptMessage>, McpError> {
        let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");

        Ok(vec![PromptMessage::new_text(
            PromptMessageRole::User,
            format!("Please perform an advanced search for: {}", query),
        )])
    }

    #[prompt(
        name = "git-search",
        description = "Search git commit history using semantic search (automatically indexes commits on-demand)"
    )]
    async fn git_search_prompt(
        &self,
        Parameters(args): Parameters<serde_json::Value>,
    ) -> Result<Vec<PromptMessage>, McpError> {
        let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
        let path = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");

        Ok(vec![PromptMessage::new_text(
            PromptMessageRole::User,
            format!(
                "Please search git commit history at path '{}' for: {}. This will automatically index commits as needed.",
                path, query
            ),
        )])
    }

    #[prompt(
        name = "definition",
        description = "Find where a symbol is defined at a given file location"
    )]
    async fn definition_prompt(
        &self,
        Parameters(args): Parameters<serde_json::Value>,
    ) -> Result<Vec<PromptMessage>, McpError> {
        let file = args.get("file").and_then(|v| v.as_str()).unwrap_or("");
        let line = args.get("line").and_then(|v| v.as_u64()).unwrap_or(1);
        let column = args.get("column").and_then(|v| v.as_u64()).unwrap_or(0);

        Ok(vec![PromptMessage::new_text(
            PromptMessageRole::User,
            format!(
                "Please find the definition of the symbol at file '{}', line {}, column {}.",
                file, line, column
            ),
        )])
    }

    #[prompt(
        name = "references",
        description = "Find all references to a symbol at a given file location"
    )]
    async fn references_prompt(
        &self,
        Parameters(args): Parameters<serde_json::Value>,
    ) -> Result<Vec<PromptMessage>, McpError> {
        let file = args.get("file").and_then(|v| v.as_str()).unwrap_or("");
        let line = args.get("line").and_then(|v| v.as_u64()).unwrap_or(1);
        let column = args.get("column").and_then(|v| v.as_u64()).unwrap_or(0);

        Ok(vec![PromptMessage::new_text(
            PromptMessageRole::User,
            format!(
                "Please find all references to the symbol at file '{}', line {}, column {}.",
                file, line, column
            ),
        )])
    }

    #[prompt(
        name = "callgraph",
        description = "Get the call graph (callers and callees) for a function at a given location"
    )]
    async fn callgraph_prompt(
        &self,
        Parameters(args): Parameters<serde_json::Value>,
    ) -> Result<Vec<PromptMessage>, McpError> {
        let file = args.get("file").and_then(|v| v.as_str()).unwrap_or("");
        let line = args.get("line").and_then(|v| v.as_u64()).unwrap_or(1);
        let column = args.get("column").and_then(|v| v.as_u64()).unwrap_or(0);

        Ok(vec![PromptMessage::new_text(
            PromptMessageRole::User,
            format!(
                "Please get the call graph for the function at file '{}', line {}, column {}. Show what calls this function and what it calls.",
                file, line, column
            ),
        )])
    }
}

#[tool_handler(router = self.tool_router)]
#[prompt_handler]
impl ServerHandler for RagMcpServer {
    fn get_info(&self) -> ServerInfo {
        ServerInfo {
            protocol_version: ProtocolVersion::default(),
            capabilities: ServerCapabilities::builder()
                .enable_tools()
                .enable_prompts()
                .build(),
            server_info: Implementation {
                name: "project".into(),
                title: Some("Project RAG - Code Understanding with Semantic Search".into()),
                version: env!("CARGO_PKG_VERSION").into(),
                icons: None,
                website_url: None,
            },
            instructions: Some(
                "RAG-based codebase indexing and semantic search. \
                Use index_codebase to create embeddings (automatically performs full or incremental indexing), \
                query_codebase to search, and search_by_filters for advanced queries."
                    .into(),
            ),
        }
    }
}

impl RagMcpServer {
    pub async fn serve_stdio() -> Result<()> {
        tracing::info!("Starting RAG MCP server");

        let server = Self::new().await.context("Failed to create MCP server")?;

        let transport = rmcp::transport::io::stdio();

        server.serve(transport).await?.waiting().await?;

        Ok(())
    }
}

#[cfg(test)]
mod tests;