Skip to main content

codesearch/mcp/
mod.rs

1//! MCP (Model Context Protocol) server for Claude Code integration
2//!
3//! Exposes codesearch's semantic search capabilities via the MCP protocol,
4//! allowing AI assistants like Claude to search codebases during conversations.
5
6pub mod types;
7
8use anyhow::Result;
9use rmcp::{
10    handler::server::router::tool::ToolRouter,
11    handler::server::wrapper::Parameters,
12    model::{CallToolResult, Content, ServerCapabilities, ServerInfo},
13    tool, tool_handler, tool_router, ErrorData as McpError, ServerHandler,
14};
15use std::path::PathBuf;
16use std::sync::{Arc, Mutex};
17use tokio_util::sync::CancellationToken;
18
19use crate::db_discovery::{find_best_database, find_databases};
20
21/// Normalize a path for comparison: strip UNC prefix, ./ prefix, convert backslashes to forward slashes
22fn normalize_path_for_compare(path: &str) -> String {
23    path.trim_start_matches("./")
24        .trim_start_matches(r"\\?\")
25        .replace('\\', "/")
26}
27use crate::embed::{EmbeddingService, ModelType};
28use crate::file::Language;
29use crate::fts::FtsStore;
30use crate::index::{IndexManager, SharedStores};
31use crate::rerank::{rrf_fusion, rrf_fusion_with_exact, EXACT_MATCH_RRF_K};
32use crate::search::{adapt_rrf_k, boost_kind, detect_identifiers, detect_structural_intent};
33use crate::vectordb::VectorStore;
34
35// Re-export types
36pub use types::*;
37
38/// Codesearch MCP service
39pub struct CodesearchService {
40    tool_router: ToolRouter<CodesearchService>,
41    db_path: PathBuf,
42    project_path: PathBuf,
43    model_type: ModelType,
44    dimensions: usize,
45    // Lazily initialized on first search
46    embedding_service: Mutex<Option<EmbeddingService>>,
47    // Shared stores for concurrent access (optional - only set when running with IndexManager)
48    shared_stores: Option<Arc<SharedStores>>,
49}
50
51impl std::fmt::Debug for CodesearchService {
52    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53        f.debug_struct("CodesearchService")
54            .field("db_path", &self.db_path)
55            .field("model_type", &self.model_type)
56            .field("dimensions", &self.dimensions)
57            .field("has_shared_stores", &self.shared_stores.is_some())
58            .finish()
59    }
60}
61
62// === Tool Router Implementation ===
63
64#[tool_router]
65impl CodesearchService {
66    /// Create a new CodesearchService (standalone mode - opens its own VectorStore)
67    #[allow(dead_code)] // Reserved for standalone MCP server mode
68    pub fn new(requested_path: Option<PathBuf>) -> Result<Self> {
69        Self::new_with_stores(requested_path, None)
70    }
71
72    /// Create a new CodesearchService with shared stores (for use with IndexManager)
73    pub fn new_with_stores(
74        requested_path: Option<PathBuf>,
75        shared_stores: Option<Arc<SharedStores>>,
76    ) -> Result<Self> {
77        // Find the best database to use
78        let db_info = find_best_database(requested_path.as_deref())?;
79
80        if db_info.is_none() {
81            return Err(anyhow::anyhow!(
82                "No database found in current directory, parent directories, or globally tracked repositories. \
83                 Run 'codesearch index' first to index the codebase."
84            ));
85        }
86
87        let db_info = db_info.unwrap();
88        let db_path = db_info.db_path;
89        let project_path = db_info.project_path;
90
91        // Read model metadata from database
92        let metadata_path = db_path.join("metadata.json");
93        let (model_type, dimensions) = if metadata_path.exists() {
94            let content = std::fs::read_to_string(&metadata_path)?;
95            let json: serde_json::Value = serde_json::from_str(&content)?;
96            let model_name = json
97                .get("model_short_name")
98                .and_then(|v| v.as_str())
99                .unwrap_or("minilm-l6");
100            let dims = json
101                .get("dimensions")
102                .and_then(|v| v.as_u64())
103                .unwrap_or(384) as usize;
104            let mt = ModelType::parse(model_name).unwrap_or_default();
105            (mt, dims)
106        } else {
107            (ModelType::default(), 384)
108        };
109
110        Ok(Self {
111            tool_router: Self::tool_router(),
112            db_path,
113            project_path,
114            model_type,
115            dimensions,
116            embedding_service: Mutex::new(None),
117            shared_stores,
118        })
119    }
120
121    /// Get or initialize the embedding service
122    fn get_embedding_service(&self) -> Result<std::sync::MutexGuard<'_, Option<EmbeddingService>>> {
123        let mut guard = self.embedding_service.lock().unwrap();
124        if guard.is_none() {
125            let cache_dir = crate::constants::get_global_models_cache_dir()?;
126            *guard = Some(EmbeddingService::with_cache_dir(
127                self.model_type,
128                Some(&cache_dir),
129            )?);
130        }
131        Ok(guard)
132    }
133
134    /// Check if database exists and return error if not
135    fn ensure_database_exists(&self) -> Result<(), String> {
136        if !self.db_path.exists() {
137            return Err(format!(
138                "❌ No index database found at: {}\n\n\
139                 ⚠️  IMPORTANT: This MCP server cannot index the codebase itself. Indexing takes 30-60 seconds and must be done manually.\n\n\
140                 To fix this, run the following command in your terminal:\n\
141                 $ cd {}\n\
142                 $ codesearch index\n\n\
143                 For more information about database locations, use the find_databases tool.",
144                self.db_path.display(),
145                self.project_path.display()
146            ));
147        }
148        Ok(())
149    }
150
151    #[tool(
152        description = "Search code semantically using natural language. Returns compact metadata by default (path, line numbers, kind, signature, score). Use the read tool with the returned line numbers to view actual code. Set compact=false only when you need full content inline. Use filter_path to narrow results to a specific directory."
153    )]
154    async fn semantic_search(
155        &self,
156        Parameters(request): Parameters<SemanticSearchRequest>,
157    ) -> Result<CallToolResult, McpError> {
158        let limit = request.limit.unwrap_or(10);
159        let compact = request.compact.unwrap_or(true);
160
161        tracing::debug!(
162            "MCP semantic_search: query='{}', limit={}, compact={}",
163            request.query,
164            limit,
165            compact
166        );
167
168        // Ensure database exists
169        if let Err(e) = self.ensure_database_exists() {
170            return Ok(CallToolResult::success(vec![Content::text(e)]));
171        }
172
173        // Get embedding service and embed query
174        // Note: We must drop the MutexGuard before any await points
175        tracing::debug!("MCP: Getting embedding service...");
176        let query_embedding = {
177            let mut service_guard = match self.get_embedding_service() {
178                Ok(g) => g,
179                Err(e) => {
180                    tracing::error!("MCP: Failed to get embedding service: {:?}", e);
181                    return Ok(CallToolResult::success(vec![Content::text(format!(
182                        "Error initializing embedding service: {}",
183                        e
184                    ))]));
185                }
186            };
187
188            let service = service_guard.as_mut().unwrap();
189            tracing::debug!("MCP: Embedding query...");
190            match service.embed_query(&request.query) {
191                Ok(e) => e,
192                Err(e) => {
193                    tracing::error!("MCP: Failed to embed query: {:?}", e);
194                    return Ok(CallToolResult::success(vec![Content::text(format!(
195                        "Error embedding query: {}",
196                        e
197                    ))]));
198                }
199            }
200            // service_guard is dropped here, before any await
201        };
202
203        // Search using shared stores if available, otherwise open a new store
204        tracing::debug!(
205            "MCP: Searching with {} dimensions...",
206            query_embedding.len()
207        );
208        let vector_results = if let Some(ref stores) = self.shared_stores {
209            // Use shared store with read lock
210            let store = stores.vector_store.read().await;
211            match store.search(&query_embedding, limit * 3) {
212                Ok(r) => r,
213                Err(e) => {
214                    tracing::error!("MCP: Search failed (shared store): {:?}", e);
215                    return Ok(CallToolResult::success(vec![Content::text(format!(
216                        "Error searching: {}",
217                        e
218                    ))]));
219                }
220            }
221        } else {
222            // Fallback: open a new store (standalone mode)
223            tracing::debug!("MCP: Opening vector store (standalone mode)...");
224            let store = match VectorStore::new(&self.db_path, self.dimensions) {
225                Ok(s) => s,
226                Err(e) => {
227                    tracing::error!("MCP: Failed to open vector store: {:?}", e);
228                    return Ok(CallToolResult::success(vec![Content::text(format!(
229                        "Error opening database: {}. The database may be corrupted or not indexed yet.",
230                        e
231                    ))]));
232                }
233            };
234            match store.search(&query_embedding, limit * 3) {
235                Ok(r) => r,
236                Err(e) => {
237                    tracing::error!("MCP: Search failed: {:?}", e);
238                    return Ok(CallToolResult::success(vec![Content::text(format!(
239                        "Error searching: {}",
240                        e
241                    ))]));
242                }
243            }
244        };
245
246        tracing::debug!("MCP: Found {} vector results", vector_results.len());
247
248        // --- Hybrid search with all improvements ---
249
250        // Detect identifiers and structural intent from query
251        let identifiers = detect_identifiers(&request.query);
252        let structural_intent = detect_structural_intent(&request.query);
253        let (vector_k, fts_k) = adapt_rrf_k(&request.query);
254
255        tracing::debug!(
256            "MCP: Query analysis - identifiers: {:?}, structural_intent: {:?}, rrf_k: ({}, {})",
257            identifiers,
258            structural_intent,
259            vector_k,
260            fts_k
261        );
262
263        // Perform FTS search and fusion
264        let mut results = match FtsStore::new(&self.db_path) {
265            Ok(fts_store) => {
266                // FTS search
267                let fts_results = fts_store
268                    .search(&request.query, limit * 3, structural_intent.clone())
269                    .unwrap_or_default();
270
271                let fused = if identifiers.is_empty() {
272                    // No identifiers: standard RRF fusion
273                    rrf_fusion(&vector_results, &fts_results, vector_k as f32)
274                } else {
275                    // Has identifiers: also do exact search per identifier
276                    let mut all_exact: Vec<crate::fts::FtsResult> = Vec::new();
277                    for ident in &identifiers {
278                        if let Ok(exact) =
279                            fts_store.search_exact(ident, limit * 2, structural_intent.clone())
280                        {
281                            for r in exact {
282                                if !all_exact.iter().any(|e| e.chunk_id == r.chunk_id) {
283                                    all_exact.push(r);
284                                }
285                            }
286                        }
287                    }
288
289                    tracing::debug!(
290                        "MCP: FTS found {} results, exact found {} results",
291                        fts_results.len(),
292                        all_exact.len()
293                    );
294
295                    rrf_fusion_with_exact(
296                        &vector_results,
297                        &fts_results,
298                        &all_exact,
299                        vector_k as f32,
300                        fts_k as f32,
301                        EXACT_MATCH_RRF_K,
302                    )
303                };
304
305                // Map FusedResult back to SearchResult
306                let chunk_to_result: std::collections::HashMap<
307                    u32,
308                    &crate::vectordb::SearchResult,
309                > = vector_results.iter().map(|r| (r.id, r)).collect();
310
311                let mut mapped: Vec<crate::vectordb::SearchResult> = Vec::new();
312                for f in fused.into_iter().take(limit) {
313                    if let Some(result) = chunk_to_result.get(&f.chunk_id) {
314                        let mut r = (*result).clone();
315                        r.score = f.rrf_score;
316                        mapped.push(r);
317                    }
318                }
319                mapped
320            }
321            Err(e) => {
322                // FTS unavailable, fall back to vector-only results
323                tracing::warn!("MCP: FTS store unavailable, using vector-only: {:?}", e);
324                vector_results.into_iter().take(limit).collect()
325            }
326        };
327
328        // Apply language boost (improvement 2)
329        if let Some((_, _, Some(primary_lang))) = crate::search::read_metadata(&self.db_path) {
330            for result in &mut results {
331                let file_lang = format!(
332                    "{:?}",
333                    Language::from_path(std::path::Path::new(&result.path))
334                );
335                if file_lang.to_lowercase() == primary_lang.to_lowercase() {
336                    result.score *= 1.2;
337                }
338            }
339            results.sort_by(|a, b| {
340                b.score
341                    .partial_cmp(&a.score)
342                    .unwrap_or(std::cmp::Ordering::Equal)
343            });
344        }
345
346        // Apply kind boost (improvement 3)
347        if let Some(target_kind) = structural_intent {
348            boost_kind(&mut results, target_kind);
349        }
350
351        tracing::debug!("MCP: Final {} results after hybrid search", results.len());
352
353        if results.is_empty() {
354            return Ok(CallToolResult::success(vec![Content::text(
355                "No results found for the query. Try rephrasing your query or using broader terms.",
356            )]));
357        }
358
359        // Convert to response format, applying compact mode and filter_path
360        let items: Vec<SearchResultItem> = results
361            .into_iter()
362            .filter(|r| {
363                // Apply filter_path if specified
364                if let Some(ref fp) = request.filter_path {
365                    let normalized_path = r.path.trim_start_matches("./");
366                    let normalized_filter = fp.trim_start_matches("./").trim_end_matches('/');
367                    normalized_path.starts_with(normalized_filter)
368                } else {
369                    true
370                }
371            })
372            .map(|r| SearchResultItem {
373                path: r.path,
374                start_line: r.start_line,
375                end_line: r.end_line,
376                kind: r.kind,
377                score: r.score,
378                signature: r.signature,
379                content: if compact { None } else { Some(r.content) },
380                context_prev: if compact { None } else { r.context_prev },
381                context_next: if compact { None } else { r.context_next },
382            })
383            .collect();
384
385        let json = serde_json::to_string(&items).unwrap_or_else(|_| "[]".to_string());
386        Ok(CallToolResult::success(vec![Content::text(json)]))
387    }
388
389    #[tool(
390        description = "Get all indexed chunks from a specific file. Returns compact metadata by default (path, line numbers, kind, signature). Useful for understanding file structure before using the read tool for specific sections."
391    )]
392    async fn get_file_chunks(
393        &self,
394        Parameters(request): Parameters<GetFileChunksRequest>,
395    ) -> Result<CallToolResult, McpError> {
396        let compact = request.compact.unwrap_or(true);
397        // Ensure database exists
398        if let Err(e) = self.ensure_database_exists() {
399            return Ok(CallToolResult::success(vec![Content::text(e)]));
400        }
401
402        // Get chunks using shared stores if available
403        let file_chunks = if let Some(ref stores) = self.shared_stores {
404            let store = stores.vector_store.read().await;
405
406            // Collect chunks for the requested file using LMDB iteration
407            // (avoids missing chunks with high IDs after delete+insert cycles)
408            let mut file_chunks: Vec<SearchResultItem> = Vec::new();
409            let all = match store.all_chunks() {
410                Ok(c) => c,
411                Err(e) => {
412                    return Ok(CallToolResult::success(vec![Content::text(format!(
413                        "Error reading chunks: {}",
414                        e
415                    ))]));
416                }
417            };
418            for (_id, chunk) in all {
419                // Normalize paths for comparison: strip UNC, normalize slashes
420                let chunk_norm = normalize_path_for_compare(&chunk.path);
421                let project_norm = normalize_path_for_compare(&self.project_path.to_string_lossy());
422                let req_norm = normalize_path_for_compare(&request.path);
423
424                // Make chunk path relative by stripping project path prefix
425                let chunk_rel = if chunk_norm.starts_with(&project_norm) {
426                    chunk_norm[project_norm.len()..]
427                        .trim_start_matches('/')
428                        .to_string()
429                } else {
430                    chunk_norm.clone()
431                };
432
433                // Match: exact, ends_with (for subdirectory repos), or raw paths
434                if chunk_rel == req_norm
435                    || chunk_rel.ends_with(&format!("/{}", req_norm))
436                    || req_norm.ends_with(&format!("/{}", chunk_rel))
437                    || chunk.path == request.path
438                {
439                    file_chunks.push(SearchResultItem {
440                        path: chunk.path,
441                        start_line: chunk.start_line,
442                        end_line: chunk.end_line,
443                        kind: chunk.kind,
444                        score: 1.0,
445                        signature: chunk.signature,
446                        content: if compact { None } else { Some(chunk.content) },
447                        context_prev: if compact { None } else { chunk.context_prev },
448                        context_next: if compact { None } else { chunk.context_next },
449                    });
450                }
451            }
452            file_chunks
453        } else {
454            // Fallback: open a new store (standalone mode)
455            let store = match VectorStore::new(&self.db_path, self.dimensions) {
456                Ok(s) => s,
457                Err(e) => {
458                    return Ok(CallToolResult::success(vec![Content::text(format!(
459                        "Error opening database: {}",
460                        e
461                    ))]));
462                }
463            };
464
465            // Collect chunks for the requested file using LMDB iteration
466            // (avoids missing chunks with high IDs after delete+insert cycles)
467            let mut file_chunks: Vec<SearchResultItem> = Vec::new();
468            let all = match store.all_chunks() {
469                Ok(c) => c,
470                Err(e) => {
471                    return Ok(CallToolResult::success(vec![Content::text(format!(
472                        "Error reading chunks: {}",
473                        e
474                    ))]));
475                }
476            };
477            for (_id, chunk) in all {
478                // Normalize paths for comparison: strip UNC, normalize slashes
479                let chunk_norm = normalize_path_for_compare(&chunk.path);
480                let project_norm = normalize_path_for_compare(&self.project_path.to_string_lossy());
481                let req_norm = normalize_path_for_compare(&request.path);
482
483                // Make chunk path relative by stripping project path prefix
484                let chunk_rel = if chunk_norm.starts_with(&project_norm) {
485                    chunk_norm[project_norm.len()..]
486                        .trim_start_matches('/')
487                        .to_string()
488                } else {
489                    chunk_norm.clone()
490                };
491
492                // Match: exact, ends_with (for subdirectory repos), or raw paths
493                if chunk_rel == req_norm
494                    || chunk_rel.ends_with(&format!("/{}", req_norm))
495                    || req_norm.ends_with(&format!("/{}", chunk_rel))
496                    || chunk.path == request.path
497                {
498                    file_chunks.push(SearchResultItem {
499                        path: chunk.path,
500                        start_line: chunk.start_line,
501                        end_line: chunk.end_line,
502                        kind: chunk.kind,
503                        score: 1.0,
504                        signature: chunk.signature,
505                        content: if compact { None } else { Some(chunk.content) },
506                        context_prev: if compact { None } else { chunk.context_prev },
507                        context_next: if compact { None } else { chunk.context_next },
508                    });
509                }
510            }
511            file_chunks
512        };
513
514        // Sort by start line
515        let mut file_chunks = file_chunks;
516        file_chunks.sort_by_key(|c| c.start_line);
517
518        if file_chunks.is_empty() {
519            return Ok(CallToolResult::success(vec![Content::text(format!(
520                "No chunks found for file: {}. The file may not be indexed or the path may be incorrect.",
521                request.path
522            ))]));
523        }
524
525        let json = serde_json::to_string(&file_chunks).unwrap_or_else(|_| "[]".to_string());
526        Ok(CallToolResult::success(vec![Content::text(json)]))
527    }
528
529    #[tool(
530        description = "Find all references/usages of a symbol (function, class, method, variable) across the codebase. USE THIS INSTEAD OF GREP when you need to find where a symbol is used — for refactoring, impact analysis, or understanding call sites. Returns compact list of file paths, line numbers, and containing function signatures."
531    )]
532    async fn find_references(
533        &self,
534        Parameters(request): Parameters<FindReferencesRequest>,
535    ) -> Result<CallToolResult, McpError> {
536        let limit = request.limit.unwrap_or(20);
537
538        tracing::debug!(
539            "MCP find_references: symbol='{}', limit={}",
540            request.symbol,
541            limit
542        );
543
544        // Ensure database exists
545        if let Err(e) = self.ensure_database_exists() {
546            return Ok(CallToolResult::success(vec![Content::text(e)]));
547        }
548
549        // Open FTS store for full-text search on the symbol name
550        let fts_store = match FtsStore::new(&self.db_path) {
551            Ok(s) => s,
552            Err(e) => {
553                return Ok(CallToolResult::success(vec![Content::text(format!(
554                    "Error opening FTS store: {}. Try re-indexing with 'codesearch index --force'.",
555                    e
556                ))]));
557            }
558        };
559
560        // Search FTS for the symbol — returns chunk_id + score
561        let fts_results = match fts_store.search(&request.symbol, limit * 2, None) {
562            Ok(r) => r,
563            Err(e) => {
564                return Ok(CallToolResult::success(vec![Content::text(format!(
565                    "Error searching for references: {}",
566                    e
567                ))]));
568            }
569        };
570
571        if fts_results.is_empty() {
572            return Ok(CallToolResult::success(vec![Content::text(format!(
573                "No references found for '{}'. The symbol may not be indexed or try a different name.",
574                request.symbol
575            ))]));
576        }
577
578        // Resolve chunk metadata from VectorStore using chunk_ids
579        let items: Vec<ReferenceItem> = if let Some(ref stores) = self.shared_stores {
580            let store = stores.vector_store.read().await;
581            fts_results
582                .iter()
583                .filter_map(|fts_result| {
584                    if let Ok(Some(chunk)) = store.get_chunk(fts_result.chunk_id) {
585                        Some(ReferenceItem {
586                            path: chunk.path,
587                            line: chunk.start_line,
588                            kind: chunk.kind,
589                            signature: chunk.signature,
590                            score: fts_result.score,
591                        })
592                    } else {
593                        None
594                    }
595                })
596                .take(limit)
597                .collect()
598        } else {
599            // Standalone mode — open a new store
600            let store = match VectorStore::new(&self.db_path, self.dimensions) {
601                Ok(s) => s,
602                Err(e) => {
603                    return Ok(CallToolResult::success(vec![Content::text(format!(
604                        "Error opening database: {}",
605                        e
606                    ))]));
607                }
608            };
609            fts_results
610                .iter()
611                .filter_map(|fts_result| {
612                    if let Ok(Some(chunk)) = store.get_chunk(fts_result.chunk_id) {
613                        Some(ReferenceItem {
614                            path: chunk.path,
615                            line: chunk.start_line,
616                            kind: chunk.kind,
617                            signature: chunk.signature,
618                            score: fts_result.score,
619                        })
620                    } else {
621                        None
622                    }
623                })
624                .take(limit)
625                .collect()
626        };
627
628        let json = serde_json::to_string(&items).unwrap_or_else(|_| "[]".to_string());
629        Ok(CallToolResult::success(vec![Content::text(json)]))
630    }
631
632    #[tool(
633        description = "Get the status of the semantic search index including model info and statistics. Check this before searching to verify the index is ready."
634    )]
635    async fn index_status(&self) -> Result<CallToolResult, McpError> {
636        let indexed = self.db_path.exists();
637
638        if !indexed {
639            let response = IndexStatusResponse {
640                indexed: false,
641                total_chunks: 0,
642                total_files: 0,
643                model: "none".to_string(),
644                dimensions: 0,
645                max_chunk_id: 0,
646                db_path: self.db_path.display().to_string(),
647                project_path: self.project_path.display().to_string(),
648                error_message: Some(
649                    "No index found. Run 'codesearch index' first to create the index.".to_string(),
650                ),
651            };
652            let json = serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string());
653            return Ok(CallToolResult::success(vec![Content::text(json)]));
654        }
655
656        // Get stats using shared stores if available
657        let stats = if let Some(ref stores) = self.shared_stores {
658            let store = stores.vector_store.read().await;
659            match store.stats() {
660                Ok(s) => s,
661                Err(e) => {
662                    let response = IndexStatusResponse {
663                        indexed: false,
664                        total_chunks: 0,
665                        total_files: 0,
666                        model: self.model_type.short_name().to_string(),
667                        dimensions: 0,
668                        max_chunk_id: 0,
669                        db_path: self.db_path.display().to_string(),
670                        project_path: self.project_path.display().to_string(),
671                        error_message: Some(format!("Error getting stats: {}", e)),
672                    };
673                    let json =
674                        serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string());
675                    return Ok(CallToolResult::success(vec![Content::text(json)]));
676                }
677            }
678        } else {
679            // Fallback: open a new store (standalone mode)
680            let store = match VectorStore::new(&self.db_path, self.dimensions) {
681                Ok(s) => s,
682                Err(e) => {
683                    let response = IndexStatusResponse {
684                        indexed: false,
685                        total_chunks: 0,
686                        total_files: 0,
687                        model: self.model_type.short_name().to_string(),
688                        dimensions: 0,
689                        max_chunk_id: 0,
690                        db_path: self.db_path.display().to_string(),
691                        project_path: self.project_path.display().to_string(),
692                        error_message: Some(format!("Error getting stats: {}", e)),
693                    };
694                    let json =
695                        serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string());
696                    return Ok(CallToolResult::success(vec![Content::text(json)]));
697                }
698            };
699
700            match store.stats() {
701                Ok(s) => s,
702                Err(e) => {
703                    let response = IndexStatusResponse {
704                        indexed: false,
705                        total_chunks: 0,
706                        total_files: 0,
707                        model: self.model_type.short_name().to_string(),
708                        dimensions: 0,
709                        max_chunk_id: 0,
710                        db_path: self.db_path.display().to_string(),
711                        project_path: self.project_path.display().to_string(),
712                        error_message: Some(format!("Error getting stats: {}", e)),
713                    };
714                    let json =
715                        serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string());
716                    return Ok(CallToolResult::success(vec![Content::text(json)]));
717                }
718            }
719        };
720
721        let response = IndexStatusResponse {
722            indexed: stats.indexed,
723            total_chunks: stats.total_chunks,
724            total_files: stats.total_files,
725            model: self.model_type.short_name().to_string(),
726            dimensions: stats.dimensions,
727            max_chunk_id: stats.max_chunk_id,
728            db_path: self.db_path.display().to_string(),
729            project_path: self.project_path.display().to_string(),
730            error_message: None,
731        };
732
733        let json = serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string());
734        Ok(CallToolResult::success(vec![Content::text(json)]))
735    }
736
737    #[tool(
738        description = "Find all available codesearch databases in the current directory, parent directories, and globally tracked repositories. Use this to discover which databases are available for searching."
739    )]
740    async fn find_databases(&self) -> Result<CallToolResult, McpError> {
741        let current_dir = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
742        let dbs = find_databases().unwrap_or_default();
743
744        let mut response_dbs = Vec::new();
745
746        for db_info in &dbs {
747            // Get stats for this database
748            let (total_chunks, total_files, model) = if db_info.db_path.exists() {
749                // Try to read model from metadata
750                let metadata_path = db_info.db_path.join("metadata.json");
751                let model_name = if metadata_path.exists() {
752                    if let Ok(content) = std::fs::read_to_string(&metadata_path) {
753                        if let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) {
754                            json.get("model_short_name")
755                                .and_then(|v| v.as_str())
756                                .unwrap_or("unknown")
757                                .to_string()
758                        } else {
759                            "unknown".to_string()
760                        }
761                    } else {
762                        "unknown".to_string()
763                    }
764                } else {
765                    "unknown".to_string()
766                };
767
768                // Try to get stats - need to infer dimensions from model name
769                let dims = match model_name.as_str() {
770                    "minilm-l6" | "minilm-l6-q" | "minilm-l12" | "minilm-l12-q" | "bge-small"
771                    | "bge-small-q" | "e5-multilingual" => 384,
772                    "bge-base" | "jina-code" | "nomic-v1.5" => 768,
773                    "bge-large" | "mxbai-large" => 1024,
774                    _ => 384, // default
775                };
776
777                // Try to get stats
778                if let Ok(store) = VectorStore::new(&db_info.db_path, dims) {
779                    if let Ok(stats) = store.stats() {
780                        (stats.total_chunks, stats.total_files, model_name)
781                    } else {
782                        (0, 0, model_name)
783                    }
784                } else {
785                    (0, 0, model_name)
786                }
787            } else {
788                (0, 0, "not found".to_string())
789            };
790
791            response_dbs.push(DatabaseInfoResponse {
792                database_path: db_info.db_path.display().to_string(),
793                project_path: db_info.project_path.display().to_string(),
794                is_current_directory: db_info.is_current,
795                depth_from_current: db_info.depth,
796                total_chunks,
797                total_files,
798                model,
799            });
800        }
801
802        // Build message based on what was found
803        let message = if dbs.is_empty() {
804            "❌ No databases found. Run 'codesearch index' to create an index.".to_string()
805        } else if dbs.iter().any(|d| d.is_current) {
806            format!(
807                "✅ Found {} database(s). Current directory has an index.",
808                dbs.len()
809            )
810        } else {
811            format!("⚠️  Found {} database(s) in parent/global directories, but not in current directory.", dbs.len())
812        };
813
814        let response = FindDatabasesResponse {
815            databases: response_dbs,
816            message,
817            current_directory: current_dir.display().to_string(),
818        };
819
820        let json = serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string());
821        Ok(CallToolResult::success(vec![Content::text(json)]))
822    }
823}
824
825// === Server Handler Implementation ===
826
827#[tool_handler]
828impl ServerHandler for CodesearchService {
829    fn get_info(&self) -> ServerInfo {
830        let current_dir = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
831        let db_exists = self.db_path.exists();
832
833        ServerInfo {
834            capabilities: ServerCapabilities::builder().enable_tools().build(),
835            server_info: rmcp::model::Implementation {
836                name: "codesearch".to_string(),
837                version: env!("CARGO_PKG_VERSION").to_string(),
838                title: None,
839                icons: None,
840                website_url: None,
841            },
842            instructions: Some(format!(
843                r#"codesearch - Semantic Code Search MCP Server
844
845codesearch provides fast, local semantic code search using natural language queries.
846Search your codebase by meaning, not just by keywords.
847
848⚠️  IMPORTANT: This MCP server CANNOT index codebases. Indexing must be done manually.
849Indexing takes 30-60 seconds and should be done via the CLI: `codesearch index`
850
851AVAILABLE TOOLS:
852
8531. find_databases()
854   Find all available databases in current directory, parent directories, and globally.
855   Use this FIRST to discover which databases are available.
856   Returns: List of databases with paths, stats, and model info.
857
8582. index_status()
859   Check if the current index is ready for searching.
860   Use this AFTER find_databases() to verify the database is accessible.
861   Returns: Index status, stats, model info, and any error messages.
862
8633. semantic_search(query, limit=10, compact=true, filter_path=null)
864   Search the codebase using natural language queries.
865   By default returns COMPACT results (path, line numbers, kind, signature, score only).
866   Set compact=false to include full code content (use sparingly - high token cost).
867   Use filter_path to narrow results to a specific directory (e.g., "src/api/").
868   Query examples:
869     - "where do we handle user authentication?"
870     - "how is error logging implemented?"
871     - "functions that process payment data"
872   Returns: Array of matches with metadata. Use read tool to fetch actual code.
873
8744. find_references(symbol, limit=50)
875   Find all usages/call sites of a function, method, class, or type across the codebase.
876   ⚠️  USE THIS instead of grep when you need to find where a symbol is used.
877   Essential for refactoring — shows all locations that need to change.
878   Examples:
879     - find_references("authenticate") - Find all calls to authenticate()
880     - find_references("UserService") - Find all usages of UserService
881     - find_references("handleRequest") - Find all call sites
882   Returns: Compact list of file paths, line numbers, kind, and score.
883
8845. get_file_chunks(path, compact=true)
885   Get all indexed chunks from a specific file.
886   Useful for understanding the structure of a file (functions, classes, methods).
887   By default returns COMPACT metadata only. Set compact=false for full content.
888   Returns: Chunks with metadata. Use read tool to fetch actual code.
889
890TOKEN-EFFICIENT WORKFLOW (IMPORTANT):
891
892All tools return compact metadata by default to minimize token usage.
893Use the read tool to fetch actual code content only for the specific
894lines you need. NEVER use grep for finding symbol usages — use
895find_references() instead.
896
897RECOMMENDED WORKFLOW:
898
899Step 1: Discover
900  find_databases() → index_status()
901
902Step 2: Search (compact — returns metadata only)
903  semantic_search("authentication handler")
904
905Step 3: Find related code (compact — returns locations only)
906  find_references("authenticate")
907
908Step 4: Read only what you need (targeted)
909  Use read tool with exact file path + line numbers from steps 2-3
910
911REFACTORING WORKFLOW:
912
9131. semantic_search("the function to refactor") → find the definition
9142. find_references("functionName") → find ALL call sites
9153. Read each call site with read tool → understand usage patterns
9164. Make changes to definition + all call sites
917
918⚠️  NEVER use grep to find symbol references. Always use find_references().
919    grep is only for exact string matching in non-indexed files.
920
921USAGE PATTERNS:
922
923Understanding a New Codebase:
924  1. find_databases() → index_status()
925  2. semantic_search("main application entry point")
926  3. semantic_search("error handling strategy")
927  4. get_file_chunks("src/main.rs") → see file structure
928
929Finding Implementation Patterns:
930  - semantic_search("how are API endpoints defined?")
931  - semantic_search("database model definitions")
932  - get_file_chunks("src/models/user.rs") → see structure, read for details
933
934Debugging and Analysis:
935  - semantic_search("error handling for database operations")
936  - find_references("handleError") → find all error handling sites
937
938BEST PRACTICES:
939
940✓ Always call find_databases() first to discover available indexes
941✓ Check index_status() before searching to verify the database is ready
942✓ Use natural language queries describing concepts, not exact terms
943✓ Use find_references() for refactoring — NOT grep
944✓ Use filter_path to narrow searches to specific directories
945✓ Let compact mode save tokens — read specific lines only when needed
946✓ Start with broader queries, then narrow down
947
948✗ Never attempt to index from this MCP server - use CLI instead
949✗ Never use grep to find symbol usages — use find_references() instead
950✗ Avoid short, vague queries like "auth" or "db"
951✗ Don't use compact=false unless you specifically need full code content
952✗ Don't search in subfolders expecting a separate index - indexes are project-wide
953
954DATABASE LOCATIONS:
955
956Priority order for database selection:
9571. Current directory (.codesearch.db/)
9582. Parent directories (up to 5 levels)
9593. Globally tracked repositories (~/.codesearch/repos.json)
960
961Current project: {project}
962Current database: {db}
963Database exists: {exists}
964Current directory: {cwd}
965
966ERROR HANDLING:
967
968If you get "No index found" errors:
9691. Call find_databases() to see what's available
9702. Check if you're in the right directory
9713. Verify the user has run 'codesearch index'
972
973If search returns poor results:
9741. The index may be stale - ask user to re-run 'codesearch index'
9752. Try different query phrasing
9763. Check index_status() for any errors
977
978SETUP:
979
980To create an index, the USER must run (not the agent):
981  $ cd /path/to/project
982  $ codesearch index
983
984Indexing takes 30-60 seconds and cannot be done from the MCP server.
985
986For detailed documentation, visit: https://github.com/flupkede/codesearch
987
988Model: {model}
989Dimensions: {dims}
990"#,
991                project = self.project_path.display(),
992                db = self.db_path.display(),
993                exists = if db_exists { "✅ Yes" } else { "❌ No" },
994                cwd = current_dir.display(),
995                model = self.model_type.short_name(),
996                dims = self.dimensions
997            )),
998            ..Default::default()
999        }
1000    }
1001}
1002
1003// === Server Entry Point ===
1004
1005/// Run the MCP server using stdio transport with file watching for live index updates.
1006///
1007/// # Multi-instance Support
1008///
1009/// When another instance is already running with write access to the same database,
1010/// this server will automatically start in **readonly mode**:
1011/// - Searches work normally
1012/// - No file watching (index won't auto-update)
1013/// - No incremental refresh
1014///
1015/// This allows multiple terminal windows to use codesearch simultaneously.
1016pub async fn run_mcp_server(path: Option<PathBuf>, cancel_token: CancellationToken) -> Result<()> {
1017    use rmcp::{transport::stdio, ServiceExt};
1018
1019    tracing::info!("🚀 Starting codesearch MCP server");
1020
1021    // Use database discovery to find the best database
1022    let db_info = find_best_database(path.as_deref())?;
1023
1024    if db_info.is_none() {
1025        return Err(anyhow::anyhow!(
1026            "No database found in current directory, parent directories, or globally tracked repositories. \
1027             Run 'codesearch index' first to index the codebase."
1028        ));
1029    }
1030
1031    let db_info = db_info.unwrap();
1032    let project_path = db_info.project_path.clone();
1033    let db_path = db_info.db_path.clone();
1034
1035    tracing::info!("📂 Project: {}", project_path.display());
1036    tracing::info!("💾 Database: {}", db_path.display());
1037
1038    // Read model metadata to get dimensions
1039    let metadata_path = db_path.join("metadata.json");
1040    let dimensions = if metadata_path.exists() {
1041        let content = std::fs::read_to_string(&metadata_path)?;
1042        let json: serde_json::Value = serde_json::from_str(&content)?;
1043        json.get("dimensions")
1044            .and_then(|v| v.as_u64())
1045            .unwrap_or(384) as usize
1046    } else {
1047        384
1048    };
1049
1050    // Always open in readonly mode — the `serve` daemon is responsible for
1051    // indexing and file watching. Running refresh + FSEvents watcher inside
1052    // the MCP process causes a hot loop on large codebases (900%+ CPU)
1053    // because refresh reads trigger filesystem events that trigger more
1054    // refreshes indefinitely.
1055    tracing::info!("📦 Creating shared stores (readonly)...");
1056    let shared_stores = SharedStores::new_readonly(&db_path, dimensions)?;
1057    let shared_stores = Arc::new(shared_stores);
1058    let is_readonly = true;
1059
1060    // Create MCP service with shared stores (ready immediately)
1061    let service = CodesearchService::new_with_stores(
1062        Some(project_path.clone()),
1063        Some(shared_stores.clone()),
1064    )?;
1065
1066    tracing::info!("🧠 Model: {}", service.model_type.name());
1067
1068    // START MCP SERVER NOW - fixes timeout!
1069    tracing::info!(
1070        "🚀 Starting MCP server{}...",
1071        if is_readonly { " (readonly)" } else { "" }
1072    );
1073    let server = service.serve(stdio()).await?;
1074
1075    tracing::info!("MCP server ready. Waiting for requests...");
1076
1077    // Only run background tasks if we have write access
1078    if !is_readonly {
1079        // Create IndexManager with shared stores (skip initial refresh - do in background)
1080        tracing::info!("🔍 Initializing index manager...");
1081        let index_manager =
1082            IndexManager::new_without_refresh(&project_path, shared_stores.clone()).await?;
1083
1084        // Background: refresh FIRST, then file watcher (sequential, not concurrent)
1085        // Both write to SharedStores, so they must not run concurrently
1086        let project_path_clone = project_path.clone();
1087        let db_path_clone = db_path.clone();
1088        let shared_stores_clone = shared_stores.clone();
1089        let index_manager_arc = Arc::new(index_manager);
1090        let bg_cancel_token = cancel_token.clone();
1091        tokio::spawn(async move {
1092            // Step 0: Pre-start FSW to collect file change events during refresh
1093            // This ensures changes made while the refresh is running are not missed
1094            if let Err(e) = index_manager_arc.start_watching().await {
1095                tracing::warn!("⚠️ Could not pre-start file watcher: {}", e);
1096            }
1097
1098            // Step 1: Run initial refresh (writes to stores)
1099            tracing::info!("🔄 Starting background incremental refresh...");
1100            match IndexManager::perform_incremental_refresh_with_stores(
1101                &project_path_clone,
1102                &db_path_clone,
1103                &shared_stores_clone,
1104            )
1105            .await
1106            {
1107                Ok(_) => {
1108                    tracing::info!("✅ Background incremental refresh completed");
1109
1110                    // Check if shutdown was requested during refresh
1111                    if bg_cancel_token.is_cancelled() {
1112                        tracing::info!("🛑 Shutdown requested, skipping file watcher startup");
1113                        return;
1114                    }
1115
1116                    // Step 2: AFTER refresh completes, start file watcher (also writes to stores)
1117                    tracing::info!("👀 Starting file watcher...");
1118                    if let Err(e) = index_manager_arc.start_file_watcher(bg_cancel_token).await {
1119                        tracing::error!("❌ Failed to start file watcher: {}", e);
1120                    } else {
1121                        tracing::info!(
1122                            "✅ File watcher active - index will auto-update on file changes"
1123                        );
1124                    }
1125                }
1126                Err(e) => {
1127                    tracing::error!("❌ Background incremental refresh failed: {}", e);
1128                }
1129            }
1130        });
1131
1132        // Start periodic log cleanup task
1133        let db_path_for_cleanup = db_path.clone();
1134        let cleanup_cancel_token = cancel_token.clone();
1135        tokio::spawn(async move {
1136            use crate::logger::{cleanup_old_logs, LogRotationConfig};
1137
1138            // Run initial cleanup on startup
1139            let rotation_config = LogRotationConfig::from_env();
1140            tracing::info!("🧹 Running initial log cleanup...");
1141            if let Err(e) = cleanup_old_logs(&db_path_for_cleanup, &rotation_config) {
1142                tracing::warn!("Initial log cleanup failed: {}", e);
1143            }
1144
1145            // Start periodic cleanup task (every 24 hours by default)
1146            crate::logger::start_cleanup_task(
1147                db_path_for_cleanup.clone(),
1148                rotation_config,
1149                cleanup_cancel_token,
1150            );
1151        });
1152    } else {
1153        tracing::info!("📖 Readonly mode: skipping background refresh and file watcher");
1154    }
1155
1156    // Wait for shutdown: either MCP transport closes or cancellation token fires
1157    tokio::select! {
1158        result = server.waiting() => {
1159            tracing::info!("MCP server transport closed");
1160            result?;
1161        }
1162        _ = cancel_token.cancelled() => {
1163            tracing::info!("🛑 Shutdown signal received, stopping MCP server...");
1164        }
1165    }
1166
1167    tracing::info!("✅ MCP server shut down cleanly");
1168    Ok(())
1169}