1pub mod types;
7
8use anyhow::Result;
9use rmcp::{
10 handler::server::router::tool::ToolRouter,
11 handler::server::wrapper::Parameters,
12 model::{CallToolResult, Content, ServerCapabilities, ServerInfo},
13 tool, tool_handler, tool_router, ErrorData as McpError, ServerHandler,
14};
15use std::path::PathBuf;
16use std::sync::{Arc, Mutex};
17use tokio_util::sync::CancellationToken;
18
19use crate::db_discovery::{find_best_database, find_databases};
20
21fn normalize_path_for_compare(path: &str) -> String {
23 path.trim_start_matches("./")
24 .trim_start_matches(r"\\?\")
25 .replace('\\', "/")
26}
27use crate::embed::{EmbeddingService, ModelType};
28use crate::file::Language;
29use crate::fts::FtsStore;
30use crate::index::{IndexManager, SharedStores};
31use crate::rerank::{rrf_fusion, rrf_fusion_with_exact, EXACT_MATCH_RRF_K};
32use crate::search::{adapt_rrf_k, boost_kind, detect_identifiers, detect_structural_intent};
33use crate::vectordb::VectorStore;
34
35pub use types::*;
37
38pub struct CodesearchService {
40 tool_router: ToolRouter<CodesearchService>,
41 db_path: PathBuf,
42 project_path: PathBuf,
43 model_type: ModelType,
44 dimensions: usize,
45 embedding_service: Mutex<Option<EmbeddingService>>,
47 shared_stores: Option<Arc<SharedStores>>,
49}
50
51impl std::fmt::Debug for CodesearchService {
52 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53 f.debug_struct("CodesearchService")
54 .field("db_path", &self.db_path)
55 .field("model_type", &self.model_type)
56 .field("dimensions", &self.dimensions)
57 .field("has_shared_stores", &self.shared_stores.is_some())
58 .finish()
59 }
60}
61
62#[tool_router]
65impl CodesearchService {
66 #[allow(dead_code)] pub fn new(requested_path: Option<PathBuf>) -> Result<Self> {
69 Self::new_with_stores(requested_path, None)
70 }
71
72 pub fn new_with_stores(
74 requested_path: Option<PathBuf>,
75 shared_stores: Option<Arc<SharedStores>>,
76 ) -> Result<Self> {
77 let db_info = find_best_database(requested_path.as_deref())?;
79
80 if db_info.is_none() {
81 return Err(anyhow::anyhow!(
82 "No database found in current directory, parent directories, or globally tracked repositories. \
83 Run 'codesearch index' first to index the codebase."
84 ));
85 }
86
87 let db_info = db_info.unwrap();
88 let db_path = db_info.db_path;
89 let project_path = db_info.project_path;
90
91 let metadata_path = db_path.join("metadata.json");
93 let (model_type, dimensions) = if metadata_path.exists() {
94 let content = std::fs::read_to_string(&metadata_path)?;
95 let json: serde_json::Value = serde_json::from_str(&content)?;
96 let model_name = json
97 .get("model_short_name")
98 .and_then(|v| v.as_str())
99 .unwrap_or("minilm-l6");
100 let dims = json
101 .get("dimensions")
102 .and_then(|v| v.as_u64())
103 .unwrap_or(384) as usize;
104 let mt = ModelType::parse(model_name).unwrap_or_default();
105 (mt, dims)
106 } else {
107 (ModelType::default(), 384)
108 };
109
110 Ok(Self {
111 tool_router: Self::tool_router(),
112 db_path,
113 project_path,
114 model_type,
115 dimensions,
116 embedding_service: Mutex::new(None),
117 shared_stores,
118 })
119 }
120
121 fn get_embedding_service(&self) -> Result<std::sync::MutexGuard<'_, Option<EmbeddingService>>> {
123 let mut guard = self.embedding_service.lock().unwrap();
124 if guard.is_none() {
125 let cache_dir = crate::constants::get_global_models_cache_dir()?;
126 *guard = Some(EmbeddingService::with_cache_dir(
127 self.model_type,
128 Some(&cache_dir),
129 )?);
130 }
131 Ok(guard)
132 }
133
134 fn ensure_database_exists(&self) -> Result<(), String> {
136 if !self.db_path.exists() {
137 return Err(format!(
138 "❌ No index database found at: {}\n\n\
139 ⚠️ IMPORTANT: This MCP server cannot index the codebase itself. Indexing takes 30-60 seconds and must be done manually.\n\n\
140 To fix this, run the following command in your terminal:\n\
141 $ cd {}\n\
142 $ codesearch index\n\n\
143 For more information about database locations, use the find_databases tool.",
144 self.db_path.display(),
145 self.project_path.display()
146 ));
147 }
148 Ok(())
149 }
150
151 #[tool(
152 description = "Search code semantically using natural language. Returns compact metadata by default (path, line numbers, kind, signature, score). Use the read tool with the returned line numbers to view actual code. Set compact=false only when you need full content inline. Use filter_path to narrow results to a specific directory."
153 )]
154 async fn semantic_search(
155 &self,
156 Parameters(request): Parameters<SemanticSearchRequest>,
157 ) -> Result<CallToolResult, McpError> {
158 let limit = request.limit.unwrap_or(10);
159 let compact = request.compact.unwrap_or(true);
160
161 tracing::debug!(
162 "MCP semantic_search: query='{}', limit={}, compact={}",
163 request.query,
164 limit,
165 compact
166 );
167
168 if let Err(e) = self.ensure_database_exists() {
170 return Ok(CallToolResult::success(vec![Content::text(e)]));
171 }
172
173 tracing::debug!("MCP: Getting embedding service...");
176 let query_embedding = {
177 let mut service_guard = match self.get_embedding_service() {
178 Ok(g) => g,
179 Err(e) => {
180 tracing::error!("MCP: Failed to get embedding service: {:?}", e);
181 return Ok(CallToolResult::success(vec![Content::text(format!(
182 "Error initializing embedding service: {}",
183 e
184 ))]));
185 }
186 };
187
188 let service = service_guard.as_mut().unwrap();
189 tracing::debug!("MCP: Embedding query...");
190 match service.embed_query(&request.query) {
191 Ok(e) => e,
192 Err(e) => {
193 tracing::error!("MCP: Failed to embed query: {:?}", e);
194 return Ok(CallToolResult::success(vec![Content::text(format!(
195 "Error embedding query: {}",
196 e
197 ))]));
198 }
199 }
200 };
202
203 tracing::debug!(
205 "MCP: Searching with {} dimensions...",
206 query_embedding.len()
207 );
208 let vector_results = if let Some(ref stores) = self.shared_stores {
209 let store = stores.vector_store.read().await;
211 match store.search(&query_embedding, limit * 3) {
212 Ok(r) => r,
213 Err(e) => {
214 tracing::error!("MCP: Search failed (shared store): {:?}", e);
215 return Ok(CallToolResult::success(vec![Content::text(format!(
216 "Error searching: {}",
217 e
218 ))]));
219 }
220 }
221 } else {
222 tracing::debug!("MCP: Opening vector store (standalone mode)...");
224 let store = match VectorStore::new(&self.db_path, self.dimensions) {
225 Ok(s) => s,
226 Err(e) => {
227 tracing::error!("MCP: Failed to open vector store: {:?}", e);
228 return Ok(CallToolResult::success(vec![Content::text(format!(
229 "Error opening database: {}. The database may be corrupted or not indexed yet.",
230 e
231 ))]));
232 }
233 };
234 match store.search(&query_embedding, limit * 3) {
235 Ok(r) => r,
236 Err(e) => {
237 tracing::error!("MCP: Search failed: {:?}", e);
238 return Ok(CallToolResult::success(vec![Content::text(format!(
239 "Error searching: {}",
240 e
241 ))]));
242 }
243 }
244 };
245
246 tracing::debug!("MCP: Found {} vector results", vector_results.len());
247
248 let identifiers = detect_identifiers(&request.query);
252 let structural_intent = detect_structural_intent(&request.query);
253 let (vector_k, fts_k) = adapt_rrf_k(&request.query);
254
255 tracing::debug!(
256 "MCP: Query analysis - identifiers: {:?}, structural_intent: {:?}, rrf_k: ({}, {})",
257 identifiers,
258 structural_intent,
259 vector_k,
260 fts_k
261 );
262
263 let mut results = match FtsStore::new(&self.db_path) {
265 Ok(fts_store) => {
266 let fts_results = fts_store
268 .search(&request.query, limit * 3, structural_intent.clone())
269 .unwrap_or_default();
270
271 let fused = if identifiers.is_empty() {
272 rrf_fusion(&vector_results, &fts_results, vector_k as f32)
274 } else {
275 let mut all_exact: Vec<crate::fts::FtsResult> = Vec::new();
277 for ident in &identifiers {
278 if let Ok(exact) =
279 fts_store.search_exact(ident, limit * 2, structural_intent.clone())
280 {
281 for r in exact {
282 if !all_exact.iter().any(|e| e.chunk_id == r.chunk_id) {
283 all_exact.push(r);
284 }
285 }
286 }
287 }
288
289 tracing::debug!(
290 "MCP: FTS found {} results, exact found {} results",
291 fts_results.len(),
292 all_exact.len()
293 );
294
295 rrf_fusion_with_exact(
296 &vector_results,
297 &fts_results,
298 &all_exact,
299 vector_k as f32,
300 fts_k as f32,
301 EXACT_MATCH_RRF_K,
302 )
303 };
304
305 let chunk_to_result: std::collections::HashMap<
307 u32,
308 &crate::vectordb::SearchResult,
309 > = vector_results.iter().map(|r| (r.id, r)).collect();
310
311 let mut mapped: Vec<crate::vectordb::SearchResult> = Vec::new();
312 for f in fused.into_iter().take(limit) {
313 if let Some(result) = chunk_to_result.get(&f.chunk_id) {
314 let mut r = (*result).clone();
315 r.score = f.rrf_score;
316 mapped.push(r);
317 }
318 }
319 mapped
320 }
321 Err(e) => {
322 tracing::warn!("MCP: FTS store unavailable, using vector-only: {:?}", e);
324 vector_results.into_iter().take(limit).collect()
325 }
326 };
327
328 if let Some((_, _, Some(primary_lang))) = crate::search::read_metadata(&self.db_path) {
330 for result in &mut results {
331 let file_lang = format!(
332 "{:?}",
333 Language::from_path(std::path::Path::new(&result.path))
334 );
335 if file_lang.to_lowercase() == primary_lang.to_lowercase() {
336 result.score *= 1.2;
337 }
338 }
339 results.sort_by(|a, b| {
340 b.score
341 .partial_cmp(&a.score)
342 .unwrap_or(std::cmp::Ordering::Equal)
343 });
344 }
345
346 if let Some(target_kind) = structural_intent {
348 boost_kind(&mut results, target_kind);
349 }
350
351 tracing::debug!("MCP: Final {} results after hybrid search", results.len());
352
353 if results.is_empty() {
354 return Ok(CallToolResult::success(vec![Content::text(
355 "No results found for the query. Try rephrasing your query or using broader terms.",
356 )]));
357 }
358
359 let items: Vec<SearchResultItem> = results
361 .into_iter()
362 .filter(|r| {
363 if let Some(ref fp) = request.filter_path {
365 let normalized_path = r.path.trim_start_matches("./");
366 let normalized_filter = fp.trim_start_matches("./").trim_end_matches('/');
367 normalized_path.starts_with(normalized_filter)
368 } else {
369 true
370 }
371 })
372 .map(|r| SearchResultItem {
373 path: r.path,
374 start_line: r.start_line,
375 end_line: r.end_line,
376 kind: r.kind,
377 score: r.score,
378 signature: r.signature,
379 content: if compact { None } else { Some(r.content) },
380 context_prev: if compact { None } else { r.context_prev },
381 context_next: if compact { None } else { r.context_next },
382 })
383 .collect();
384
385 let json = serde_json::to_string(&items).unwrap_or_else(|_| "[]".to_string());
386 Ok(CallToolResult::success(vec![Content::text(json)]))
387 }
388
389 #[tool(
390 description = "Get all indexed chunks from a specific file. Returns compact metadata by default (path, line numbers, kind, signature). Useful for understanding file structure before using the read tool for specific sections."
391 )]
392 async fn get_file_chunks(
393 &self,
394 Parameters(request): Parameters<GetFileChunksRequest>,
395 ) -> Result<CallToolResult, McpError> {
396 let compact = request.compact.unwrap_or(true);
397 if let Err(e) = self.ensure_database_exists() {
399 return Ok(CallToolResult::success(vec![Content::text(e)]));
400 }
401
402 let file_chunks = if let Some(ref stores) = self.shared_stores {
404 let store = stores.vector_store.read().await;
405
406 let mut file_chunks: Vec<SearchResultItem> = Vec::new();
409 let all = match store.all_chunks() {
410 Ok(c) => c,
411 Err(e) => {
412 return Ok(CallToolResult::success(vec![Content::text(format!(
413 "Error reading chunks: {}",
414 e
415 ))]));
416 }
417 };
418 for (_id, chunk) in all {
419 let chunk_norm = normalize_path_for_compare(&chunk.path);
421 let project_norm = normalize_path_for_compare(&self.project_path.to_string_lossy());
422 let req_norm = normalize_path_for_compare(&request.path);
423
424 let chunk_rel = if chunk_norm.starts_with(&project_norm) {
426 chunk_norm[project_norm.len()..]
427 .trim_start_matches('/')
428 .to_string()
429 } else {
430 chunk_norm.clone()
431 };
432
433 if chunk_rel == req_norm
435 || chunk_rel.ends_with(&format!("/{}", req_norm))
436 || req_norm.ends_with(&format!("/{}", chunk_rel))
437 || chunk.path == request.path
438 {
439 file_chunks.push(SearchResultItem {
440 path: chunk.path,
441 start_line: chunk.start_line,
442 end_line: chunk.end_line,
443 kind: chunk.kind,
444 score: 1.0,
445 signature: chunk.signature,
446 content: if compact { None } else { Some(chunk.content) },
447 context_prev: if compact { None } else { chunk.context_prev },
448 context_next: if compact { None } else { chunk.context_next },
449 });
450 }
451 }
452 file_chunks
453 } else {
454 let store = match VectorStore::new(&self.db_path, self.dimensions) {
456 Ok(s) => s,
457 Err(e) => {
458 return Ok(CallToolResult::success(vec![Content::text(format!(
459 "Error opening database: {}",
460 e
461 ))]));
462 }
463 };
464
465 let mut file_chunks: Vec<SearchResultItem> = Vec::new();
468 let all = match store.all_chunks() {
469 Ok(c) => c,
470 Err(e) => {
471 return Ok(CallToolResult::success(vec![Content::text(format!(
472 "Error reading chunks: {}",
473 e
474 ))]));
475 }
476 };
477 for (_id, chunk) in all {
478 let chunk_norm = normalize_path_for_compare(&chunk.path);
480 let project_norm = normalize_path_for_compare(&self.project_path.to_string_lossy());
481 let req_norm = normalize_path_for_compare(&request.path);
482
483 let chunk_rel = if chunk_norm.starts_with(&project_norm) {
485 chunk_norm[project_norm.len()..]
486 .trim_start_matches('/')
487 .to_string()
488 } else {
489 chunk_norm.clone()
490 };
491
492 if chunk_rel == req_norm
494 || chunk_rel.ends_with(&format!("/{}", req_norm))
495 || req_norm.ends_with(&format!("/{}", chunk_rel))
496 || chunk.path == request.path
497 {
498 file_chunks.push(SearchResultItem {
499 path: chunk.path,
500 start_line: chunk.start_line,
501 end_line: chunk.end_line,
502 kind: chunk.kind,
503 score: 1.0,
504 signature: chunk.signature,
505 content: if compact { None } else { Some(chunk.content) },
506 context_prev: if compact { None } else { chunk.context_prev },
507 context_next: if compact { None } else { chunk.context_next },
508 });
509 }
510 }
511 file_chunks
512 };
513
514 let mut file_chunks = file_chunks;
516 file_chunks.sort_by_key(|c| c.start_line);
517
518 if file_chunks.is_empty() {
519 return Ok(CallToolResult::success(vec![Content::text(format!(
520 "No chunks found for file: {}. The file may not be indexed or the path may be incorrect.",
521 request.path
522 ))]));
523 }
524
525 let json = serde_json::to_string(&file_chunks).unwrap_or_else(|_| "[]".to_string());
526 Ok(CallToolResult::success(vec![Content::text(json)]))
527 }
528
529 #[tool(
530 description = "Find all references/usages of a symbol (function, class, method, variable) across the codebase. USE THIS INSTEAD OF GREP when you need to find where a symbol is used — for refactoring, impact analysis, or understanding call sites. Returns compact list of file paths, line numbers, and containing function signatures."
531 )]
532 async fn find_references(
533 &self,
534 Parameters(request): Parameters<FindReferencesRequest>,
535 ) -> Result<CallToolResult, McpError> {
536 let limit = request.limit.unwrap_or(20);
537
538 tracing::debug!(
539 "MCP find_references: symbol='{}', limit={}",
540 request.symbol,
541 limit
542 );
543
544 if let Err(e) = self.ensure_database_exists() {
546 return Ok(CallToolResult::success(vec![Content::text(e)]));
547 }
548
549 let fts_store = match FtsStore::new(&self.db_path) {
551 Ok(s) => s,
552 Err(e) => {
553 return Ok(CallToolResult::success(vec![Content::text(format!(
554 "Error opening FTS store: {}. Try re-indexing with 'codesearch index --force'.",
555 e
556 ))]));
557 }
558 };
559
560 let fts_results = match fts_store.search(&request.symbol, limit * 2, None) {
562 Ok(r) => r,
563 Err(e) => {
564 return Ok(CallToolResult::success(vec![Content::text(format!(
565 "Error searching for references: {}",
566 e
567 ))]));
568 }
569 };
570
571 if fts_results.is_empty() {
572 return Ok(CallToolResult::success(vec![Content::text(format!(
573 "No references found for '{}'. The symbol may not be indexed or try a different name.",
574 request.symbol
575 ))]));
576 }
577
578 let items: Vec<ReferenceItem> = if let Some(ref stores) = self.shared_stores {
580 let store = stores.vector_store.read().await;
581 fts_results
582 .iter()
583 .filter_map(|fts_result| {
584 if let Ok(Some(chunk)) = store.get_chunk(fts_result.chunk_id) {
585 Some(ReferenceItem {
586 path: chunk.path,
587 line: chunk.start_line,
588 kind: chunk.kind,
589 signature: chunk.signature,
590 score: fts_result.score,
591 })
592 } else {
593 None
594 }
595 })
596 .take(limit)
597 .collect()
598 } else {
599 let store = match VectorStore::new(&self.db_path, self.dimensions) {
601 Ok(s) => s,
602 Err(e) => {
603 return Ok(CallToolResult::success(vec![Content::text(format!(
604 "Error opening database: {}",
605 e
606 ))]));
607 }
608 };
609 fts_results
610 .iter()
611 .filter_map(|fts_result| {
612 if let Ok(Some(chunk)) = store.get_chunk(fts_result.chunk_id) {
613 Some(ReferenceItem {
614 path: chunk.path,
615 line: chunk.start_line,
616 kind: chunk.kind,
617 signature: chunk.signature,
618 score: fts_result.score,
619 })
620 } else {
621 None
622 }
623 })
624 .take(limit)
625 .collect()
626 };
627
628 let json = serde_json::to_string(&items).unwrap_or_else(|_| "[]".to_string());
629 Ok(CallToolResult::success(vec![Content::text(json)]))
630 }
631
632 #[tool(
633 description = "Get the status of the semantic search index including model info and statistics. Check this before searching to verify the index is ready."
634 )]
635 async fn index_status(&self) -> Result<CallToolResult, McpError> {
636 let indexed = self.db_path.exists();
637
638 if !indexed {
639 let response = IndexStatusResponse {
640 indexed: false,
641 total_chunks: 0,
642 total_files: 0,
643 model: "none".to_string(),
644 dimensions: 0,
645 max_chunk_id: 0,
646 db_path: self.db_path.display().to_string(),
647 project_path: self.project_path.display().to_string(),
648 error_message: Some(
649 "No index found. Run 'codesearch index' first to create the index.".to_string(),
650 ),
651 };
652 let json = serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string());
653 return Ok(CallToolResult::success(vec![Content::text(json)]));
654 }
655
656 let stats = if let Some(ref stores) = self.shared_stores {
658 let store = stores.vector_store.read().await;
659 match store.stats() {
660 Ok(s) => s,
661 Err(e) => {
662 let response = IndexStatusResponse {
663 indexed: false,
664 total_chunks: 0,
665 total_files: 0,
666 model: self.model_type.short_name().to_string(),
667 dimensions: 0,
668 max_chunk_id: 0,
669 db_path: self.db_path.display().to_string(),
670 project_path: self.project_path.display().to_string(),
671 error_message: Some(format!("Error getting stats: {}", e)),
672 };
673 let json =
674 serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string());
675 return Ok(CallToolResult::success(vec![Content::text(json)]));
676 }
677 }
678 } else {
679 let store = match VectorStore::new(&self.db_path, self.dimensions) {
681 Ok(s) => s,
682 Err(e) => {
683 let response = IndexStatusResponse {
684 indexed: false,
685 total_chunks: 0,
686 total_files: 0,
687 model: self.model_type.short_name().to_string(),
688 dimensions: 0,
689 max_chunk_id: 0,
690 db_path: self.db_path.display().to_string(),
691 project_path: self.project_path.display().to_string(),
692 error_message: Some(format!("Error getting stats: {}", e)),
693 };
694 let json =
695 serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string());
696 return Ok(CallToolResult::success(vec![Content::text(json)]));
697 }
698 };
699
700 match store.stats() {
701 Ok(s) => s,
702 Err(e) => {
703 let response = IndexStatusResponse {
704 indexed: false,
705 total_chunks: 0,
706 total_files: 0,
707 model: self.model_type.short_name().to_string(),
708 dimensions: 0,
709 max_chunk_id: 0,
710 db_path: self.db_path.display().to_string(),
711 project_path: self.project_path.display().to_string(),
712 error_message: Some(format!("Error getting stats: {}", e)),
713 };
714 let json =
715 serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string());
716 return Ok(CallToolResult::success(vec![Content::text(json)]));
717 }
718 }
719 };
720
721 let response = IndexStatusResponse {
722 indexed: stats.indexed,
723 total_chunks: stats.total_chunks,
724 total_files: stats.total_files,
725 model: self.model_type.short_name().to_string(),
726 dimensions: stats.dimensions,
727 max_chunk_id: stats.max_chunk_id,
728 db_path: self.db_path.display().to_string(),
729 project_path: self.project_path.display().to_string(),
730 error_message: None,
731 };
732
733 let json = serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string());
734 Ok(CallToolResult::success(vec![Content::text(json)]))
735 }
736
737 #[tool(
738 description = "Find all available codesearch databases in the current directory, parent directories, and globally tracked repositories. Use this to discover which databases are available for searching."
739 )]
740 async fn find_databases(&self) -> Result<CallToolResult, McpError> {
741 let current_dir = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
742 let dbs = find_databases().unwrap_or_default();
743
744 let mut response_dbs = Vec::new();
745
746 for db_info in &dbs {
747 let (total_chunks, total_files, model) = if db_info.db_path.exists() {
749 let metadata_path = db_info.db_path.join("metadata.json");
751 let model_name = if metadata_path.exists() {
752 if let Ok(content) = std::fs::read_to_string(&metadata_path) {
753 if let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) {
754 json.get("model_short_name")
755 .and_then(|v| v.as_str())
756 .unwrap_or("unknown")
757 .to_string()
758 } else {
759 "unknown".to_string()
760 }
761 } else {
762 "unknown".to_string()
763 }
764 } else {
765 "unknown".to_string()
766 };
767
768 let dims = match model_name.as_str() {
770 "minilm-l6" | "minilm-l6-q" | "minilm-l12" | "minilm-l12-q" | "bge-small"
771 | "bge-small-q" | "e5-multilingual" => 384,
772 "bge-base" | "jina-code" | "nomic-v1.5" => 768,
773 "bge-large" | "mxbai-large" => 1024,
774 _ => 384, };
776
777 if let Ok(store) = VectorStore::new(&db_info.db_path, dims) {
779 if let Ok(stats) = store.stats() {
780 (stats.total_chunks, stats.total_files, model_name)
781 } else {
782 (0, 0, model_name)
783 }
784 } else {
785 (0, 0, model_name)
786 }
787 } else {
788 (0, 0, "not found".to_string())
789 };
790
791 response_dbs.push(DatabaseInfoResponse {
792 database_path: db_info.db_path.display().to_string(),
793 project_path: db_info.project_path.display().to_string(),
794 is_current_directory: db_info.is_current,
795 depth_from_current: db_info.depth,
796 total_chunks,
797 total_files,
798 model,
799 });
800 }
801
802 let message = if dbs.is_empty() {
804 "❌ No databases found. Run 'codesearch index' to create an index.".to_string()
805 } else if dbs.iter().any(|d| d.is_current) {
806 format!(
807 "✅ Found {} database(s). Current directory has an index.",
808 dbs.len()
809 )
810 } else {
811 format!("⚠️ Found {} database(s) in parent/global directories, but not in current directory.", dbs.len())
812 };
813
814 let response = FindDatabasesResponse {
815 databases: response_dbs,
816 message,
817 current_directory: current_dir.display().to_string(),
818 };
819
820 let json = serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string());
821 Ok(CallToolResult::success(vec![Content::text(json)]))
822 }
823}
824
825#[tool_handler]
828impl ServerHandler for CodesearchService {
829 fn get_info(&self) -> ServerInfo {
830 let current_dir = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
831 let db_exists = self.db_path.exists();
832
833 ServerInfo {
834 capabilities: ServerCapabilities::builder().enable_tools().build(),
835 server_info: rmcp::model::Implementation {
836 name: "codesearch".to_string(),
837 version: env!("CARGO_PKG_VERSION").to_string(),
838 title: None,
839 icons: None,
840 website_url: None,
841 },
842 instructions: Some(format!(
843 r#"codesearch - Semantic Code Search MCP Server
844
845codesearch provides fast, local semantic code search using natural language queries.
846Search your codebase by meaning, not just by keywords.
847
848⚠️ IMPORTANT: This MCP server CANNOT index codebases. Indexing must be done manually.
849Indexing takes 30-60 seconds and should be done via the CLI: `codesearch index`
850
851AVAILABLE TOOLS:
852
8531. find_databases()
854 Find all available databases in current directory, parent directories, and globally.
855 Use this FIRST to discover which databases are available.
856 Returns: List of databases with paths, stats, and model info.
857
8582. index_status()
859 Check if the current index is ready for searching.
860 Use this AFTER find_databases() to verify the database is accessible.
861 Returns: Index status, stats, model info, and any error messages.
862
8633. semantic_search(query, limit=10, compact=true, filter_path=null)
864 Search the codebase using natural language queries.
865 By default returns COMPACT results (path, line numbers, kind, signature, score only).
866 Set compact=false to include full code content (use sparingly - high token cost).
867 Use filter_path to narrow results to a specific directory (e.g., "src/api/").
868 Query examples:
869 - "where do we handle user authentication?"
870 - "how is error logging implemented?"
871 - "functions that process payment data"
872 Returns: Array of matches with metadata. Use read tool to fetch actual code.
873
8744. find_references(symbol, limit=50)
875 Find all usages/call sites of a function, method, class, or type across the codebase.
876 ⚠️ USE THIS instead of grep when you need to find where a symbol is used.
877 Essential for refactoring — shows all locations that need to change.
878 Examples:
879 - find_references("authenticate") - Find all calls to authenticate()
880 - find_references("UserService") - Find all usages of UserService
881 - find_references("handleRequest") - Find all call sites
882 Returns: Compact list of file paths, line numbers, kind, and score.
883
8845. get_file_chunks(path, compact=true)
885 Get all indexed chunks from a specific file.
886 Useful for understanding the structure of a file (functions, classes, methods).
887 By default returns COMPACT metadata only. Set compact=false for full content.
888 Returns: Chunks with metadata. Use read tool to fetch actual code.
889
890TOKEN-EFFICIENT WORKFLOW (IMPORTANT):
891
892All tools return compact metadata by default to minimize token usage.
893Use the read tool to fetch actual code content only for the specific
894lines you need. NEVER use grep for finding symbol usages — use
895find_references() instead.
896
897RECOMMENDED WORKFLOW:
898
899Step 1: Discover
900 find_databases() → index_status()
901
902Step 2: Search (compact — returns metadata only)
903 semantic_search("authentication handler")
904
905Step 3: Find related code (compact — returns locations only)
906 find_references("authenticate")
907
908Step 4: Read only what you need (targeted)
909 Use read tool with exact file path + line numbers from steps 2-3
910
911REFACTORING WORKFLOW:
912
9131. semantic_search("the function to refactor") → find the definition
9142. find_references("functionName") → find ALL call sites
9153. Read each call site with read tool → understand usage patterns
9164. Make changes to definition + all call sites
917
918⚠️ NEVER use grep to find symbol references. Always use find_references().
919 grep is only for exact string matching in non-indexed files.
920
921USAGE PATTERNS:
922
923Understanding a New Codebase:
924 1. find_databases() → index_status()
925 2. semantic_search("main application entry point")
926 3. semantic_search("error handling strategy")
927 4. get_file_chunks("src/main.rs") → see file structure
928
929Finding Implementation Patterns:
930 - semantic_search("how are API endpoints defined?")
931 - semantic_search("database model definitions")
932 - get_file_chunks("src/models/user.rs") → see structure, read for details
933
934Debugging and Analysis:
935 - semantic_search("error handling for database operations")
936 - find_references("handleError") → find all error handling sites
937
938BEST PRACTICES:
939
940✓ Always call find_databases() first to discover available indexes
941✓ Check index_status() before searching to verify the database is ready
942✓ Use natural language queries describing concepts, not exact terms
943✓ Use find_references() for refactoring — NOT grep
944✓ Use filter_path to narrow searches to specific directories
945✓ Let compact mode save tokens — read specific lines only when needed
946✓ Start with broader queries, then narrow down
947
948✗ Never attempt to index from this MCP server - use CLI instead
949✗ Never use grep to find symbol usages — use find_references() instead
950✗ Avoid short, vague queries like "auth" or "db"
951✗ Don't use compact=false unless you specifically need full code content
952✗ Don't search in subfolders expecting a separate index - indexes are project-wide
953
954DATABASE LOCATIONS:
955
956Priority order for database selection:
9571. Current directory (.codesearch.db/)
9582. Parent directories (up to 5 levels)
9593. Globally tracked repositories (~/.codesearch/repos.json)
960
961Current project: {project}
962Current database: {db}
963Database exists: {exists}
964Current directory: {cwd}
965
966ERROR HANDLING:
967
968If you get "No index found" errors:
9691. Call find_databases() to see what's available
9702. Check if you're in the right directory
9713. Verify the user has run 'codesearch index'
972
973If search returns poor results:
9741. The index may be stale - ask user to re-run 'codesearch index'
9752. Try different query phrasing
9763. Check index_status() for any errors
977
978SETUP:
979
980To create an index, the USER must run (not the agent):
981 $ cd /path/to/project
982 $ codesearch index
983
984Indexing takes 30-60 seconds and cannot be done from the MCP server.
985
986For detailed documentation, visit: https://github.com/flupkede/codesearch
987
988Model: {model}
989Dimensions: {dims}
990"#,
991 project = self.project_path.display(),
992 db = self.db_path.display(),
993 exists = if db_exists { "✅ Yes" } else { "❌ No" },
994 cwd = current_dir.display(),
995 model = self.model_type.short_name(),
996 dims = self.dimensions
997 )),
998 ..Default::default()
999 }
1000 }
1001}
1002
1003pub async fn run_mcp_server(path: Option<PathBuf>, cancel_token: CancellationToken) -> Result<()> {
1017 use rmcp::{transport::stdio, ServiceExt};
1018
1019 tracing::info!("🚀 Starting codesearch MCP server");
1020
1021 let db_info = find_best_database(path.as_deref())?;
1023
1024 if db_info.is_none() {
1025 return Err(anyhow::anyhow!(
1026 "No database found in current directory, parent directories, or globally tracked repositories. \
1027 Run 'codesearch index' first to index the codebase."
1028 ));
1029 }
1030
1031 let db_info = db_info.unwrap();
1032 let project_path = db_info.project_path.clone();
1033 let db_path = db_info.db_path.clone();
1034
1035 tracing::info!("📂 Project: {}", project_path.display());
1036 tracing::info!("💾 Database: {}", db_path.display());
1037
1038 let metadata_path = db_path.join("metadata.json");
1040 let dimensions = if metadata_path.exists() {
1041 let content = std::fs::read_to_string(&metadata_path)?;
1042 let json: serde_json::Value = serde_json::from_str(&content)?;
1043 json.get("dimensions")
1044 .and_then(|v| v.as_u64())
1045 .unwrap_or(384) as usize
1046 } else {
1047 384
1048 };
1049
1050 tracing::info!("📦 Creating shared stores (readonly)...");
1056 let shared_stores = SharedStores::new_readonly(&db_path, dimensions)?;
1057 let shared_stores = Arc::new(shared_stores);
1058 let is_readonly = true;
1059
1060 let service = CodesearchService::new_with_stores(
1062 Some(project_path.clone()),
1063 Some(shared_stores.clone()),
1064 )?;
1065
1066 tracing::info!("🧠 Model: {}", service.model_type.name());
1067
1068 tracing::info!(
1070 "🚀 Starting MCP server{}...",
1071 if is_readonly { " (readonly)" } else { "" }
1072 );
1073 let server = service.serve(stdio()).await?;
1074
1075 tracing::info!("MCP server ready. Waiting for requests...");
1076
1077 if !is_readonly {
1079 tracing::info!("🔍 Initializing index manager...");
1081 let index_manager =
1082 IndexManager::new_without_refresh(&project_path, shared_stores.clone()).await?;
1083
1084 let project_path_clone = project_path.clone();
1087 let db_path_clone = db_path.clone();
1088 let shared_stores_clone = shared_stores.clone();
1089 let index_manager_arc = Arc::new(index_manager);
1090 let bg_cancel_token = cancel_token.clone();
1091 tokio::spawn(async move {
1092 if let Err(e) = index_manager_arc.start_watching().await {
1095 tracing::warn!("⚠️ Could not pre-start file watcher: {}", e);
1096 }
1097
1098 tracing::info!("🔄 Starting background incremental refresh...");
1100 match IndexManager::perform_incremental_refresh_with_stores(
1101 &project_path_clone,
1102 &db_path_clone,
1103 &shared_stores_clone,
1104 )
1105 .await
1106 {
1107 Ok(_) => {
1108 tracing::info!("✅ Background incremental refresh completed");
1109
1110 if bg_cancel_token.is_cancelled() {
1112 tracing::info!("🛑 Shutdown requested, skipping file watcher startup");
1113 return;
1114 }
1115
1116 tracing::info!("👀 Starting file watcher...");
1118 if let Err(e) = index_manager_arc.start_file_watcher(bg_cancel_token).await {
1119 tracing::error!("❌ Failed to start file watcher: {}", e);
1120 } else {
1121 tracing::info!(
1122 "✅ File watcher active - index will auto-update on file changes"
1123 );
1124 }
1125 }
1126 Err(e) => {
1127 tracing::error!("❌ Background incremental refresh failed: {}", e);
1128 }
1129 }
1130 });
1131
1132 let db_path_for_cleanup = db_path.clone();
1134 let cleanup_cancel_token = cancel_token.clone();
1135 tokio::spawn(async move {
1136 use crate::logger::{cleanup_old_logs, LogRotationConfig};
1137
1138 let rotation_config = LogRotationConfig::from_env();
1140 tracing::info!("🧹 Running initial log cleanup...");
1141 if let Err(e) = cleanup_old_logs(&db_path_for_cleanup, &rotation_config) {
1142 tracing::warn!("Initial log cleanup failed: {}", e);
1143 }
1144
1145 crate::logger::start_cleanup_task(
1147 db_path_for_cleanup.clone(),
1148 rotation_config,
1149 cleanup_cancel_token,
1150 );
1151 });
1152 } else {
1153 tracing::info!("📖 Readonly mode: skipping background refresh and file watcher");
1154 }
1155
1156 tokio::select! {
1158 result = server.waiting() => {
1159 tracing::info!("MCP server transport closed");
1160 result?;
1161 }
1162 _ = cancel_token.cancelled() => {
1163 tracing::info!("🛑 Shutdown signal received, stopping MCP server...");
1164 }
1165 }
1166
1167 tracing::info!("✅ MCP server shut down cleanly");
1168 Ok(())
1169}