1use anyhow::Result;
2use rmcp::handler::server::router::tool::ToolRouter;
3use rmcp::handler::server::tool::{ToolCallContext, ToolRoute};
4use rmcp::model::{
5 CallToolRequestParam, CallToolResult, Content, Implementation, InitializeResult,
6 ListToolsResult, Meta, PaginatedRequestParam, ProgressNotificationParam, ProtocolVersion, Tool,
7 ToolsCapability,
8};
9use rmcp::service::RequestContext;
10use rmcp::transport;
11use rmcp::{ErrorData, Peer, RoleServer};
12use rmcp::{ServerHandler, ServiceExt};
13use schemars::JsonSchema;
14use serde::{Deserialize, Serialize};
15use serde_json::{Value, json};
16use std::path::{Path, PathBuf};
17use std::sync::Arc;
18use std::sync::atomic::{AtomicUsize, Ordering};
19use std::time::Instant;
20use tracing::info;
21use walkdir::WalkDir;
22
23use crate::mcp::context::McpContext;
24use crate::mcp::session::{PaginationConfig, SearchPage};
25use crate::path_utils::{build_include_patterns, expand_glob_patterns_with_base};
26use ck_core::{
27 IncludePattern, SearchMode, SearchOptions, get_default_ckignore_content,
28 get_default_exclude_patterns,
29};
30
31const DEFAULT_MCP_TOP_K: usize = 10;
34
35fn filter_valid_results(mut results: Vec<ck_core::SearchResult>) -> Vec<ck_core::SearchResult> {
37 results.retain(|result| result.file.exists());
38 results
39}
40
41#[cfg(test)]
42mod tests {
43 use super::*;
44 use std::fs;
45 use tempfile::tempdir;
46
47 #[test]
48 fn include_patterns_support_semicolon_lists_and_globs() {
49 let temp_dir = tempdir().unwrap();
50 let base = temp_dir.path();
51
52 fs::create_dir_all(base.join("docs/sub")).unwrap();
53 fs::write(base.join("docs/readme.md"), "# docs").unwrap();
54 fs::write(base.join("docs/sub/note.md"), "note").unwrap();
55 fs::create_dir_all(base.join("src")).unwrap();
56 fs::write(base.join("src/lib.rs"), "pub fn lib() {}").unwrap();
57 fs::write(base.join("file.ts"), "export {}").unwrap();
58
59 let patterns =
60 resolve_include_patterns(base, Some(vec!["docs/;*.rs;file.ts".to_string()]), &[])
61 .expect("resolve patterns");
62
63 let saw_docs = patterns
64 .iter()
65 .any(|pattern| pattern.is_dir && pattern.path.ends_with("docs"));
66 let saw_rs = patterns
67 .iter()
68 .any(|pattern| !pattern.is_dir && pattern.path.ends_with("lib.rs"));
69 let saw_ts = patterns
70 .iter()
71 .any(|pattern| !pattern.is_dir && pattern.path.ends_with("file.ts"));
72
73 assert!(saw_docs, "docs directory should be included");
74 assert!(saw_rs, "lib.rs should be included via glob");
75 assert!(saw_ts, "file.ts should be included explicitly");
76 }
77}
78
79fn resolve_exclude_patterns(
80 explicit: Option<Vec<String>>,
81 use_default_excludes: Option<bool>,
82) -> Vec<String> {
83 ck_core::build_exclude_patterns(
86 &explicit.unwrap_or_default(),
87 use_default_excludes.unwrap_or(true),
88 )
89}
90
91fn resolve_include_patterns(
92 base_path: &Path,
93 include_patterns: Option<Vec<String>>,
94 exclude_patterns: &[String],
95) -> Result<Vec<IncludePattern>, ErrorData> {
96 let Some(patterns) = include_patterns else {
97 return Ok(Vec::new());
98 };
99
100 let mut prepared_patterns: Vec<PathBuf> = Vec::new();
101
102 for pattern in patterns {
103 for segment in pattern.split(';') {
104 let trimmed = segment.trim();
105 if trimmed.is_empty() {
106 continue;
107 }
108
109 prepared_patterns.push(PathBuf::from(trimmed));
110 }
111 }
112
113 let expanded = expand_glob_patterns_with_base(base_path, &prepared_patterns, exclude_patterns)
114 .map_err(|e| {
115 ErrorData::invalid_params(format!("Failed to expand include patterns: {e}"), None)
116 })?;
117
118 Ok(build_include_patterns(&expanded))
119}
120
121trait PaginationParams {
123 fn get_page_size(&self) -> Option<usize>;
124 fn get_include_snippet(&self) -> Option<bool>;
125 fn get_snippet_length(&self) -> Option<usize>;
126 fn get_context_lines(&self) -> Option<usize>;
127 fn get_search_mode(&self) -> String;
128 fn get_query(&self) -> String;
129 fn get_search_params(&self) -> serde_json::Value;
130}
131
132#[derive(Serialize, Deserialize, JsonSchema, Default)]
133pub struct SemanticSearchRequest {
134 pub query: String,
135 pub path: String,
136 pub top_k: Option<usize>,
137 pub threshold: Option<f32>,
138 #[schemars(with = "Vec<String>")]
139 pub include_patterns: Option<Vec<String>>,
140 #[schemars(with = "Vec<String>")]
141 pub exclude_patterns: Option<Vec<String>>,
142 pub respect_gitignore: Option<bool>,
143 pub use_default_excludes: Option<bool>,
144 pub rerank: Option<bool>,
145 pub rerank_model: Option<String>,
146 pub case_insensitive: Option<bool>,
147 pub whole_word: Option<bool>,
148 pub fixed_string: Option<bool>,
149 pub before_context_lines: Option<usize>,
150 pub after_context_lines: Option<usize>,
151 pub cursor: Option<String>,
153 pub page_size: Option<usize>,
154 pub include_snippet: Option<bool>,
155 pub snippet_length: Option<usize>,
156 pub context_lines: Option<usize>,
157}
158
159#[derive(Serialize, Deserialize, JsonSchema, Default)]
160pub struct RegexSearchRequest {
161 pub pattern: String,
162 pub path: String,
163 pub ignore_case: Option<bool>,
164 pub context: Option<usize>,
165 #[schemars(with = "Vec<String>")]
166 pub include_patterns: Option<Vec<String>>,
167 #[schemars(with = "Vec<String>")]
168 pub exclude_patterns: Option<Vec<String>>,
169 pub respect_gitignore: Option<bool>,
170 pub use_default_excludes: Option<bool>,
171 pub whole_word: Option<bool>,
172 pub fixed_string: Option<bool>,
173 pub cursor: Option<String>,
175 pub page_size: Option<usize>,
176 pub include_snippet: Option<bool>,
177 pub snippet_length: Option<usize>,
178}
179
180#[derive(Serialize, Deserialize, JsonSchema, Default)]
181pub struct HybridSearchRequest {
182 pub query: String,
183 pub path: String,
184 pub top_k: Option<usize>,
185 pub threshold: Option<f32>,
186 #[schemars(with = "Vec<String>")]
187 pub include_patterns: Option<Vec<String>>,
188 #[schemars(with = "Vec<String>")]
189 pub exclude_patterns: Option<Vec<String>>,
190 pub respect_gitignore: Option<bool>,
191 pub use_default_excludes: Option<bool>,
192 pub rerank: Option<bool>,
193 pub rerank_model: Option<String>,
194 pub case_insensitive: Option<bool>,
195 pub whole_word: Option<bool>,
196 pub fixed_string: Option<bool>,
197 pub before_context_lines: Option<usize>,
198 pub after_context_lines: Option<usize>,
199 pub cursor: Option<String>,
201 pub page_size: Option<usize>,
202 pub include_snippet: Option<bool>,
203 pub snippet_length: Option<usize>,
204 pub context_lines: Option<usize>,
205}
206
207#[derive(Serialize, Deserialize, JsonSchema, Default)]
208pub struct LexicalSearchRequest {
209 pub query: String,
210 pub path: String,
211 pub top_k: Option<usize>,
212 pub threshold: Option<f32>,
213 #[schemars(with = "Vec<String>")]
214 pub include_patterns: Option<Vec<String>>,
215 #[schemars(with = "Vec<String>")]
216 pub exclude_patterns: Option<Vec<String>>,
217 pub respect_gitignore: Option<bool>,
218 pub use_default_excludes: Option<bool>,
219 pub case_insensitive: Option<bool>,
220 pub whole_word: Option<bool>,
221 pub fixed_string: Option<bool>,
222 pub before_context_lines: Option<usize>,
223 pub after_context_lines: Option<usize>,
224 pub cursor: Option<String>,
226 pub page_size: Option<usize>,
227 pub include_snippet: Option<bool>,
228 pub snippet_length: Option<usize>,
229 pub context_lines: Option<usize>,
230}
231
232#[derive(Serialize, Deserialize, JsonSchema)]
233pub struct IndexStatusRequest {
234 pub path: String,
235}
236
237#[derive(Serialize, Deserialize, JsonSchema)]
238pub struct ReindexRequest {
239 pub path: String,
240 pub force: Option<bool>,
241}
242
243impl PaginationParams for SemanticSearchRequest {
244 fn get_page_size(&self) -> Option<usize> {
245 self.page_size
246 }
247 fn get_include_snippet(&self) -> Option<bool> {
248 self.include_snippet
249 }
250 fn get_snippet_length(&self) -> Option<usize> {
251 self.snippet_length
252 }
253 fn get_context_lines(&self) -> Option<usize> {
254 self.context_lines
255 }
256 fn get_search_mode(&self) -> String {
257 "semantic".to_string()
258 }
259 fn get_query(&self) -> String {
260 self.query.clone()
261 }
262 fn get_search_params(&self) -> serde_json::Value {
263 json!({
264 "top_k": self.top_k,
265 "threshold": self.threshold.unwrap_or(0.6),
266 "rerank": self.rerank.unwrap_or(false),
267 "rerank_model": self.rerank_model,
268 "case_insensitive": self.case_insensitive.unwrap_or(false),
269 "whole_word": self.whole_word.unwrap_or(false),
270 "fixed_string": self.fixed_string.unwrap_or(false),
271 "include_patterns": self.include_patterns,
272 "exclude_patterns": self.exclude_patterns,
273 "respect_gitignore": self.respect_gitignore.unwrap_or(true),
274 "use_default_excludes": self.use_default_excludes.unwrap_or(true),
275 "context_lines": self.context_lines,
276 "before_context_lines": self.before_context_lines,
277 "after_context_lines": self.after_context_lines,
278 "include_snippet": self.include_snippet.unwrap_or(true),
279 "snippet_length": self.snippet_length
280 })
281 }
282}
283
284impl PaginationParams for RegexSearchRequest {
285 fn get_page_size(&self) -> Option<usize> {
286 self.page_size
287 }
288 fn get_include_snippet(&self) -> Option<bool> {
289 self.include_snippet
290 }
291 fn get_snippet_length(&self) -> Option<usize> {
292 self.snippet_length
293 }
294 fn get_context_lines(&self) -> Option<usize> {
295 Some(self.context.unwrap_or(0))
296 }
297 fn get_search_mode(&self) -> String {
298 "regex".to_string()
299 }
300 fn get_query(&self) -> String {
301 self.pattern.clone()
302 }
303 fn get_search_params(&self) -> serde_json::Value {
304 json!({
305 "ignore_case": self.ignore_case.unwrap_or(false),
306 "context_lines": self.context.unwrap_or(0),
307 "whole_word": self.whole_word.unwrap_or(false),
308 "fixed_string": self.fixed_string.unwrap_or(false),
309 "include_patterns": self.include_patterns,
310 "exclude_patterns": self.exclude_patterns,
311 "respect_gitignore": self.respect_gitignore.unwrap_or(true),
312 "use_default_excludes": self.use_default_excludes.unwrap_or(true),
313 "include_snippet": self.include_snippet.unwrap_or(true),
314 "snippet_length": self.snippet_length
315 })
316 }
317}
318
319impl PaginationParams for HybridSearchRequest {
320 fn get_page_size(&self) -> Option<usize> {
321 self.page_size
322 }
323 fn get_include_snippet(&self) -> Option<bool> {
324 self.include_snippet
325 }
326 fn get_snippet_length(&self) -> Option<usize> {
327 self.snippet_length
328 }
329 fn get_context_lines(&self) -> Option<usize> {
330 self.context_lines
331 }
332 fn get_search_mode(&self) -> String {
333 "hybrid".to_string()
334 }
335 fn get_query(&self) -> String {
336 self.query.clone()
337 }
338 fn get_search_params(&self) -> serde_json::Value {
339 json!({
340 "top_k": self.top_k,
341 "threshold": self.threshold.unwrap_or(0.02),
342 "rerank": self.rerank.unwrap_or(false),
343 "rerank_model": self.rerank_model,
344 "case_insensitive": self.case_insensitive.unwrap_or(false),
345 "whole_word": self.whole_word.unwrap_or(false),
346 "fixed_string": self.fixed_string.unwrap_or(false),
347 "include_patterns": self.include_patterns,
348 "exclude_patterns": self.exclude_patterns,
349 "respect_gitignore": self.respect_gitignore.unwrap_or(true),
350 "use_default_excludes": self.use_default_excludes.unwrap_or(true),
351 "context_lines": self.context_lines,
352 "before_context_lines": self.before_context_lines,
353 "after_context_lines": self.after_context_lines,
354 "include_snippet": self.include_snippet.unwrap_or(true),
355 "snippet_length": self.snippet_length
356 })
357 }
358}
359
360impl PaginationParams for LexicalSearchRequest {
361 fn get_page_size(&self) -> Option<usize> {
362 self.page_size
363 }
364 fn get_include_snippet(&self) -> Option<bool> {
365 self.include_snippet
366 }
367 fn get_snippet_length(&self) -> Option<usize> {
368 self.snippet_length
369 }
370 fn get_context_lines(&self) -> Option<usize> {
371 self.context_lines
372 }
373 fn get_search_mode(&self) -> String {
374 "lexical".to_string()
375 }
376 fn get_query(&self) -> String {
377 self.query.clone()
378 }
379 fn get_search_params(&self) -> serde_json::Value {
380 json!({
381 "top_k": self.top_k,
382 "threshold": self.threshold,
383 "case_insensitive": self.case_insensitive.unwrap_or(false),
384 "whole_word": self.whole_word.unwrap_or(false),
385 "fixed_string": self.fixed_string.unwrap_or(false),
386 "include_patterns": self.include_patterns,
387 "exclude_patterns": self.exclude_patterns,
388 "respect_gitignore": self.respect_gitignore.unwrap_or(true),
389 "use_default_excludes": self.use_default_excludes.unwrap_or(true),
390 "context_lines": self.context_lines,
391 "before_context_lines": self.before_context_lines,
392 "after_context_lines": self.after_context_lines,
393 "include_snippet": self.include_snippet.unwrap_or(true),
394 "snippet_length": self.snippet_length
395 })
396 }
397}
398
399#[derive(Clone)]
400pub struct CkMcpServer {
401 context: McpContext,
402 tool_router: ToolRouter<Self>,
403}
404
405impl ServerHandler for CkMcpServer {
406 fn get_info(&self) -> InitializeResult {
407 InitializeResult {
408 protocol_version: ProtocolVersion::V_2024_11_05,
409 server_info: Implementation {
410 name: "ck".to_string(),
411 version: env!("CARGO_PKG_VERSION").to_string(),
412 title: Some("CK Semantic Search Server".to_string()),
413 website_url: Some("https://github.com/BeaconBay/ck".to_string()),
414 icons: None,
415 },
416 capabilities: rmcp::model::ServerCapabilities {
417 tools: Some(ToolsCapability {
418 list_changed: Some(false),
419 }),
420 ..Default::default()
421 },
422 instructions: Some(r#"CK is a semantic code search engine that helps you find code by meaning, not just text matching.
423
424## Available Tools:
425
426- **semantic_search**: Find code by describing what it does, not exact text. Best for conceptual searches like "function that handles authentication" or "code that processes payments"
427- **regex_search**: Traditional pattern matching. Use for exact text, symbols, or specific code patterns
428- **hybrid_search**: Combines semantic and regex search with RRF ranking. Best when you want both conceptual matches and specific keywords
429- **index_status**: Check if a directory is indexed and ready for semantic search
430- **reindex**: Force rebuild of the semantic index when code has changed
431- **health_check**: Verify the server is running and responsive
432
433## Usage Tips:
434
4351. Semantic search works best with natural language queries describing functionality
4362. The first semantic search in a directory triggers automatic indexing
4373. Use regex_search for exact matches, variable names, or specific syntax
4384. Hybrid search is ideal when you know some keywords but want related code too
4395. All searches respect .gitignore by default
4406. Use pagination parameters to control result size and prevent large token responses
441
442## Pagination Parameters:
443
444All search tools support:
445- **page_size** (default: 50, max: 200) - Results per page
446- **include_snippet** (default: true) - Include code snippets
447- **snippet_length** (default: 500) - Max characters per snippet
448- **cursor** - Opaque cursor for subsequent pages
449- **context_lines** - Lines of context (semantic/hybrid only)
450
451## Examples:
452
453- Semantic: "error handling for database connections"
454- Regex: "async fn.*handle_request"
455- Hybrid: "authentication login" (finds both exact matches and conceptually related code)
456- Paginated: Use page_size=25 and follow next_cursor for large result sets"#.to_string()),
457 }
458 }
459
460 async fn call_tool(
461 &self,
462 request: CallToolRequestParam,
463 context: RequestContext<RoleServer>,
464 ) -> Result<CallToolResult, ErrorData> {
465 let tool_context = ToolCallContext::new(self, request, context);
466 if let Some(route) = self.tool_router.map.get(&tool_context.name) {
467 (route.call)(tool_context).await
468 } else {
469 Err(ErrorData::method_not_found::<
470 rmcp::model::CallToolRequestMethod,
471 >())
472 }
473 }
474
475 async fn list_tools(
476 &self,
477 _request: Option<PaginatedRequestParam>,
478 _context: RequestContext<RoleServer>,
479 ) -> Result<ListToolsResult, ErrorData> {
480 let tools: Vec<Tool> = self
481 .tool_router
482 .map
483 .values()
484 .map(|route| route.attr.clone())
485 .collect();
486 Ok(ListToolsResult {
487 tools,
488 next_cursor: None,
489 })
490 }
491}
492
493impl CkMcpServer {
494 pub fn new(cwd: PathBuf) -> Result<Self> {
495 let context = McpContext::new(cwd)?;
496 let tool_router = Self::create_tool_router();
497 Ok(Self {
498 context,
499 tool_router,
500 })
501 }
502
503 fn extract_pagination_config(
505 page_size: Option<usize>,
506 include_snippet: Option<bool>,
507 snippet_length: Option<usize>,
508 context_lines: Option<usize>,
509 ) -> PaginationConfig {
510 PaginationConfig {
511 page_size: page_size.unwrap_or(50),
512 include_snippet: include_snippet.unwrap_or(true),
513 snippet_length: snippet_length.unwrap_or(500),
514 context_lines: context_lines.unwrap_or(0),
515 }
516 .validate()
517 }
518
519 fn search_page_to_json(
521 page: SearchPage,
522 query: &str,
523 mode: &str,
524 search_params: serde_json::Value,
525 search_time_ms: u64,
526 ) -> serde_json::Value {
527 let results: Vec<serde_json::Value> = page.matches.iter().map(|result| {
528 let match_type = format!("{mode}_match");
529 let mut match_obj = json!({
530 "file": {
531 "path": result.file.to_string_lossy(),
532 "language": result.lang.as_ref().map(std::string::ToString::to_string).unwrap_or("unknown".to_string())
533 },
534 "match": {
535 "span": {
536 "byte_start": result.span.byte_start,
537 "byte_end": result.span.byte_end,
538 "line_start": result.span.line_start,
539 "line_end": result.span.line_end
540 },
541 "content": result.preview
542 },
543 "type": match_type
544 });
545
546 if mode == "semantic" || mode == "hybrid" {
548 match_obj["match"]["score"] = json!(result.score);
549 if mode == "hybrid" {
550 match_obj["match"]["rrf_score"] = json!(result.score);
551 }
552 }
553
554 match_obj["match"]["line_number"] = json!(result.span.line_start);
555
556 match_obj
557 }).collect();
558
559 json!({
560 "search": {
561 "query": query,
562 "mode": mode,
563 "parameters": search_params
564 },
565 "results": {
566 "matches": results,
567 "count": page.count,
568 "total_count": page.total_count,
569 "has_more": page.has_more,
570 "truncated": page.truncated
571 },
572 "pagination": {
573 "next_cursor": page.next_cursor,
574 "page_size": page.original_page_size,
575 "current_page": page.current_page
576 },
577 "metadata": {
578 "search_time_ms": search_time_ms,
579 "index_stats": null }
581 })
582 }
583
584 async fn handle_paginated_request<T>(
586 &self,
587 cursor: &str,
588 request: &T,
589 ) -> Result<(String, Value), ErrorData>
590 where
591 T: PaginationParams,
592 {
593 let config = Self::extract_pagination_config(
594 request.get_page_size(),
595 request.get_include_snippet(),
596 request.get_snippet_length(),
597 request.get_context_lines(),
598 );
599
600 let page = self
601 .context
602 .session_manager
603 .get_page_by_cursor(cursor, config)
604 .await
605 .map_err(|e| ErrorData::invalid_params(e, None))?;
606
607 let mode = request.get_search_mode();
608 let query = request.get_query();
609 let search_params = request.get_search_params();
610
611 let structured_result = Self::search_page_to_json(page, &query, &mode, search_params, 0);
612
613 let summary = format!(
614 "Retrieved page {} of {} search results for '{}'",
615 structured_result["pagination"]["current_page"], mode, query
616 );
617
618 Ok((summary, structured_result))
619 }
620
621 fn create_tool_router() -> ToolRouter<Self> {
622 let mut router = ToolRouter::new();
623 router.add_route(Self::health_check_route());
624 router.add_route(Self::semantic_search_route());
625 router.add_route(Self::lexical_search_route());
626 router.add_route(Self::regex_search_route());
627 router.add_route(Self::hybrid_search_route());
628 router.add_route(Self::index_status_route());
629 router.add_route(Self::reindex_route());
630 router.add_route(Self::default_ckignore_route());
631 router
632 }
633
634 fn default_ckignore_route() -> ToolRoute<Self> {
635 let input_schema = serde_json::json!({
636 "$schema": "https://json-schema.org/draft/2020-12/schema",
637 "type": "object",
638 "properties": {},
639 "additionalProperties": false,
640 });
641
642 let tool = Tool {
643 name: "default_ckignore".into(),
644 title: Some("Default .ckignore".into()),
645 description: Some("Retrieve the default .ckignore content generated by ck".into()),
646 input_schema: Arc::new(input_schema.as_object().unwrap().clone()),
647 output_schema: None,
648 annotations: None,
649 icons: None,
650 };
651
652 ToolRoute::new_dyn(tool, |_context: ToolCallContext<'_, CkMcpServer>| {
653 Box::pin(async move {
654 let content = get_default_ckignore_content();
655 let structured = json!({
656 "ckignore": content,
657 "length": content.lines().count(),
658 });
659 let summary = "Default .ckignore patterns for ck".to_string();
660
661 Ok(CallToolResult {
662 content: vec![
663 Content::text(summary.clone()),
664 Content::json(structured.clone())
665 .map_err(|e| ErrorData::internal_error(e.to_string(), None))?,
666 ],
667 structured_content: Some(structured),
668 is_error: Some(false),
669 meta: None,
670 })
671 })
672 })
673 }
674
675 fn health_check_route() -> ToolRoute<Self> {
676 let input_schema = serde_json::json!({
677 "$schema": "https://json-schema.org/draft/2020-12/schema",
678 "type": "object",
679 "properties": {},
680 "additionalProperties": false,
681 });
682 let tool = Tool {
683 name: "health_check".into(),
684 title: Some("Health Check".into()),
685 description: Some("Health check tool to verify server status".into()),
686 input_schema: Arc::new(input_schema.as_object().unwrap().clone()),
687 output_schema: None,
688 annotations: None,
689 icons: None,
690 };
691
692 ToolRoute::new_dyn(tool, |context: ToolCallContext<'_, CkMcpServer>| {
693 Box::pin(async move {
694 let status_data = json!({
695 "status": "healthy",
696 "server": "ck",
697 "version": env!("CARGO_PKG_VERSION"),
698 "protocol": "mcp",
699 "timestamp": chrono::Utc::now().to_rfc3339(),
700 "cwd": context.service.context.cwd.to_string_lossy()
701 });
702
703 let summary = format!(
704 "CK Semantic Search Server v{} is healthy and ready (MCP protocol, working directory: {})",
705 env!("CARGO_PKG_VERSION"),
706 context.service.context.cwd.to_string_lossy()
707 );
708
709 Ok(CallToolResult {
710 content: vec![
711 Content::text(summary),
712 Content::json(status_data.clone())
713 .map_err(|e| ErrorData::internal_error(e.to_string(), None))?,
714 ],
715 structured_content: Some(status_data),
716 is_error: Some(false),
717 meta: None,
718 })
719 })
720 })
721 }
722
723 fn semantic_search_route() -> ToolRoute<Self> {
724 let schema = schemars::schema_for!(SemanticSearchRequest);
725 let input_schema = serde_json::to_value(schema).unwrap();
726 let tool = Tool {
727 name: "semantic_search".into(),
728 title: Some("Semantic Search".into()),
729 description: Some("Search for code semantically using embeddings".into()),
730 input_schema: Arc::new(input_schema.as_object().unwrap().clone()),
731 output_schema: None,
732 annotations: None,
733 icons: None,
734 };
735
736 ToolRoute::new_dyn(tool, |context: ToolCallContext<'_, CkMcpServer>| {
737 Box::pin(async move {
738 let arguments = context.arguments.clone().unwrap_or_default();
739 let request: SemanticSearchRequest =
740 serde_json::from_value(serde_json::Value::Object(arguments)).map_err(|e| {
741 rmcp::ErrorData::invalid_params(format!("Invalid parameters: {e}"), None)
742 })?;
743
744 let service: &CkMcpServer = context.service;
745 let meta = context.request_context.meta.clone();
746 let peer = context.request_context.peer;
747 match service
748 .handle_semantic_search(request, Some(meta), Some(peer))
749 .await
750 {
751 Ok((summary, result)) => Ok(CallToolResult {
752 content: vec![
753 Content::text(summary),
754 Content::json(result.clone())
755 .map_err(|e| ErrorData::internal_error(e.to_string(), None))?,
756 ],
757 structured_content: Some(result),
758 is_error: Some(false),
759 meta: None,
760 }),
761 Err(e) => Err(e),
762 }
763 })
764 })
765 }
766
767 fn regex_search_route() -> ToolRoute<Self> {
768 let schema = schemars::schema_for!(RegexSearchRequest);
769 let input_schema = serde_json::to_value(schema).unwrap();
770 let tool = Tool {
771 name: "regex_search".into(),
772 title: Some("Regex Search".into()),
773 description: Some("Search for code using regular expressions (grep-style)".into()),
774 input_schema: Arc::new(input_schema.as_object().unwrap().clone()),
775 output_schema: None,
776 annotations: None,
777 icons: None,
778 };
779
780 ToolRoute::new_dyn(tool, |context: ToolCallContext<'_, CkMcpServer>| {
781 Box::pin(async move {
782 let arguments = context.arguments.clone().unwrap_or_default();
783 let request: RegexSearchRequest =
784 serde_json::from_value(serde_json::Value::Object(arguments)).map_err(|e| {
785 rmcp::ErrorData::invalid_params(format!("Invalid parameters: {e}"), None)
786 })?;
787
788 let service: &CkMcpServer = context.service;
789 match service.handle_regex_search(request).await {
790 Ok((summary, result)) => Ok(CallToolResult {
791 content: vec![
792 Content::text(summary),
793 Content::json(result.clone())
794 .map_err(|e| ErrorData::internal_error(e.to_string(), None))?,
795 ],
796 structured_content: Some(result),
797 is_error: Some(false),
798 meta: None,
799 }),
800 Err(e) => Err(e),
801 }
802 })
803 })
804 }
805
806 fn lexical_search_route() -> ToolRoute<Self> {
807 let schema = schemars::schema_for!(LexicalSearchRequest);
808 let input_schema = serde_json::to_value(schema).unwrap();
809 let tool = Tool {
810 name: "lexical_search".into(),
811 title: Some("Lexical Search".into()),
812 description: Some("BM25 lexical search".into()),
813 input_schema: Arc::new(input_schema.as_object().unwrap().clone()),
814 output_schema: None,
815 annotations: None,
816 icons: None,
817 };
818
819 ToolRoute::new_dyn(tool, |context: ToolCallContext<'_, CkMcpServer>| {
820 Box::pin(async move {
821 let arguments = context.arguments.clone().unwrap_or_default();
822 let request: LexicalSearchRequest =
823 serde_json::from_value(serde_json::Value::Object(arguments)).map_err(|e| {
824 rmcp::ErrorData::invalid_params(format!("Invalid parameters: {e}"), None)
825 })?;
826
827 let service: &CkMcpServer = context.service;
828 match service.handle_lexical_search(request).await {
829 Ok((summary, result)) => Ok(CallToolResult {
830 content: vec![
831 Content::text(summary),
832 Content::json(result.clone())
833 .map_err(|e| ErrorData::internal_error(e.to_string(), None))?,
834 ],
835 structured_content: Some(result),
836 is_error: Some(false),
837 meta: None,
838 }),
839 Err(e) => Err(e),
840 }
841 })
842 })
843 }
844
845 fn hybrid_search_route() -> ToolRoute<Self> {
846 let schema = schemars::schema_for!(HybridSearchRequest);
847 let input_schema = serde_json::to_value(schema).unwrap();
848 let tool = Tool {
849 name: "hybrid_search".into(),
850 title: Some("Hybrid Search".into()),
851 description: Some(
852 "Hybrid search combining regex and semantic search with RRF ranking".into(),
853 ),
854 input_schema: Arc::new(input_schema.as_object().unwrap().clone()),
855 output_schema: None,
856 annotations: None,
857 icons: None,
858 };
859
860 ToolRoute::new_dyn(tool, |context: ToolCallContext<'_, CkMcpServer>| {
861 Box::pin(async move {
862 let arguments = context.arguments.clone().unwrap_or_default();
863 let request: HybridSearchRequest =
864 serde_json::from_value(serde_json::Value::Object(arguments)).map_err(|e| {
865 rmcp::ErrorData::invalid_params(format!("Invalid parameters: {e}"), None)
866 })?;
867
868 let service: &CkMcpServer = context.service;
869 match service.handle_hybrid_search(request).await {
870 Ok((summary, result)) => Ok(CallToolResult {
871 content: vec![
872 Content::text(summary),
873 Content::json(result.clone())
874 .map_err(|e| ErrorData::internal_error(e.to_string(), None))?,
875 ],
876 structured_content: Some(result),
877 is_error: Some(false),
878 meta: None,
879 }),
880 Err(e) => Err(e),
881 }
882 })
883 })
884 }
885
886 fn index_status_route() -> ToolRoute<Self> {
887 let schema = schemars::schema_for!(IndexStatusRequest);
888 let input_schema = serde_json::to_value(schema).unwrap();
889 let tool = Tool {
890 name: "index_status".into(),
891 title: Some("Index Status".into()),
892 description: Some("Get information about the index status for a directory".into()),
893 input_schema: Arc::new(input_schema.as_object().unwrap().clone()),
894 output_schema: None,
895 annotations: None,
896 icons: None,
897 };
898
899 ToolRoute::new_dyn(tool, |context: ToolCallContext<'_, CkMcpServer>| {
900 Box::pin(async move {
901 let arguments = context.arguments.clone().unwrap_or_default();
902 let request: IndexStatusRequest =
903 serde_json::from_value(serde_json::Value::Object(arguments)).map_err(|e| {
904 rmcp::ErrorData::invalid_params(format!("Invalid parameters: {e}"), None)
905 })?;
906
907 let service: &CkMcpServer = context.service;
908 let meta = context.request_context.meta.clone();
909 let peer = context.request_context.peer;
910 match service
911 .handle_index_status(request, Some(meta), Some(peer))
912 .await
913 {
914 Ok((summary, result)) => Ok(CallToolResult {
915 content: vec![
916 Content::text(summary),
917 Content::json(result.clone())
918 .map_err(|e| ErrorData::internal_error(e.to_string(), None))?,
919 ],
920 structured_content: Some(result),
921 is_error: Some(false),
922 meta: None,
923 }),
924 Err(e) => Err(e),
925 }
926 })
927 })
928 }
929
930 fn reindex_route() -> ToolRoute<Self> {
931 let schema = schemars::schema_for!(ReindexRequest);
932 let input_schema = serde_json::to_value(schema).unwrap();
933 let tool = Tool {
934 name: "reindex".into(),
935 title: Some("Reindex Directory".into()),
936 description: Some("Force reindexing of a directory with progress tracking".into()),
937 input_schema: Arc::new(input_schema.as_object().unwrap().clone()),
938 output_schema: None,
939 annotations: None,
940 icons: None,
941 };
942
943 ToolRoute::new_dyn(tool, |context: ToolCallContext<'_, CkMcpServer>| {
944 Box::pin(async move {
945 let arguments = context.arguments.clone().unwrap_or_default();
946 let request: ReindexRequest =
947 serde_json::from_value(serde_json::Value::Object(arguments)).map_err(|e| {
948 rmcp::ErrorData::invalid_params(format!("Invalid parameters: {e}"), None)
949 })?;
950
951 let service: &CkMcpServer = context.service;
952 let meta = context.request_context.meta.clone();
953 let peer = context.request_context.peer;
954 match service
955 .handle_reindex(request, Some(meta), Some(peer))
956 .await
957 {
958 Ok((summary, result)) => Ok(CallToolResult {
959 content: vec![
960 Content::text(summary),
961 Content::json(result.clone())
962 .map_err(|e| ErrorData::internal_error(e.to_string(), None))?,
963 ],
964 structured_content: Some(result),
965 is_error: Some(false),
966 meta: None,
967 }),
968 Err(e) => Err(e),
969 }
970 })
971 })
972 }
973
974 pub async fn run(&self) -> Result<()> {
975 info!("Starting ck MCP server");
976
977 let stdio_transport = transport::stdio();
978 let running_service = self.clone().serve(stdio_transport).await?;
979 running_service.waiting().await?;
980 Ok(())
981 }
982
983 pub async fn handle_semantic_search(
984 &self,
985 request: SemanticSearchRequest,
986 meta: Option<Meta>,
987 peer: Option<Peer<RoleServer>>,
988 ) -> Result<(String, Value), ErrorData> {
989 if let Some(cursor) = &request.cursor {
991 return self.handle_paginated_request(cursor, &request).await;
992 }
993
994 let query = request.query.clone();
995 let path = request.path;
996 let top_k = request.top_k;
997 let threshold = request.threshold;
998 let path_buf = self.context.resolve_request_path(&path)?;
1000 let search_root = if path_buf.is_dir() {
1001 path_buf.clone()
1002 } else {
1003 path_buf
1004 .parent()
1005 .map(std::path::Path::to_path_buf)
1006 .unwrap_or_else(|| PathBuf::from("."))
1007 };
1008
1009 let respect_gitignore = request.respect_gitignore.unwrap_or(true);
1010 let use_default_excludes = request.use_default_excludes.unwrap_or(true);
1011 let exclude_patterns =
1012 resolve_exclude_patterns(request.exclude_patterns.clone(), Some(use_default_excludes));
1013 let include_patterns = resolve_include_patterns(
1014 &search_root,
1015 request.include_patterns.clone(),
1016 &exclude_patterns,
1017 )?;
1018
1019 let query_clone = query.clone();
1021 let path_clone = path_buf.clone();
1022
1023 let config = Self::extract_pagination_config(
1025 request.page_size,
1026 request.include_snippet,
1027 request.snippet_length,
1028 request.context_lines,
1029 );
1030
1031 let indexing_progress_callback = if let (Some(meta), Some(peer)) = (&meta, &peer) {
1033 if let Some(progress_token) = meta.get_progress_token() {
1034 let token = progress_token.clone();
1035 let peer = peer.clone();
1036 let step_count = Arc::new(AtomicUsize::new(0));
1037 Some(Box::new(move |message: &str| {
1038 let token = token.clone();
1039 let peer = peer.clone();
1040 let message = message.to_string();
1041 let current_step = step_count.fetch_add(1, Ordering::SeqCst) + 1;
1042 tokio::spawn(async move {
1043 let _ = peer
1044 .notify_progress(ProgressNotificationParam {
1045 progress_token: token,
1046 progress: current_step as f64,
1047 total: None, message: Some(message),
1049 })
1050 .await;
1051 });
1052 }) as ck_engine::IndexingProgressCallback)
1053 } else {
1054 None
1055 }
1056 } else {
1057 None
1058 };
1059
1060 let include_snippet = request.include_snippet.unwrap_or(true);
1061 let context_lines = request.context_lines.unwrap_or(0);
1062 let before_context_lines = request.before_context_lines.unwrap_or(context_lines);
1063 let after_context_lines = request.after_context_lines.unwrap_or(context_lines);
1064
1065 let options = SearchOptions {
1066 mode: SearchMode::Semantic,
1067 query,
1068 path: path_buf,
1069 top_k: top_k.or(Some(DEFAULT_MCP_TOP_K)),
1070 threshold: threshold.or(Some(0.6)),
1071 case_insensitive: request.case_insensitive.unwrap_or(false),
1072 whole_word: request.whole_word.unwrap_or(false),
1073 fixed_string: request.fixed_string.unwrap_or(false),
1074 line_numbers: false,
1075 context_lines,
1076 before_context_lines,
1077 after_context_lines,
1078 recursive: true,
1079 json_output: false,
1080 jsonl_output: true,
1081 no_snippet: !include_snippet,
1082 reindex: false,
1083 show_scores: true,
1084 show_filenames: true,
1085 files_with_matches: false,
1086 files_without_matches: false,
1087 exclude_patterns,
1088 include_patterns,
1089 respect_gitignore,
1090 use_ckignore: true,
1091 full_section: false,
1092 rerank: request.rerank.unwrap_or(false),
1093 rerank_model: request.rerank_model.clone(),
1094 embedding_model: None,
1095 };
1096
1097 let mut indexing_progress_callback = indexing_progress_callback;
1102 let mut effective_mode: Option<String> = None;
1103 let started = Instant::now();
1104 let search_results = match ck_engine::search_enhanced_with_indexing_progress(
1105 &options,
1106 None,
1107 indexing_progress_callback.take(),
1108 None,
1109 )
1110 .await
1111 {
1112 Ok(results) => results,
1113 Err(e) => {
1114 let message = e.to_string();
1115 if message.contains("No embeddings found") {
1116 tracing::warn!(
1117 "semantic search missing embeddings, attempting reindex: {}",
1118 message
1119 );
1120 let mut reindex_options = options.clone();
1121 reindex_options.reindex = true;
1122 match ck_engine::search_enhanced_with_indexing_progress(
1123 &reindex_options,
1124 None,
1125 None,
1126 None,
1127 )
1128 .await
1129 {
1130 Ok(results) => results,
1131 Err(retry_err) => {
1132 tracing::warn!("semantic search failed after reindex: {}", retry_err);
1133 let mut fallback_options = options.clone();
1135 fallback_options.mode = SearchMode::Lexical;
1136 fallback_options.reindex = true;
1137 match ck_engine::search_enhanced_with_indexing_progress(
1138 &fallback_options,
1139 None,
1140 None,
1141 None,
1142 )
1143 .await
1144 {
1145 Ok(mut lexical_results) => {
1146 if let Some(limit) = top_k {
1147 lexical_results
1148 .matches
1149 .truncate(limit.min(lexical_results.matches.len()));
1150 }
1151 effective_mode =
1152 Some("semantic (lexical fallback)".to_string());
1153 lexical_results
1154 }
1155 Err(final_err) => {
1156 return Err(ErrorData::internal_error(
1157 final_err.to_string(),
1158 None,
1159 ));
1160 }
1161 }
1162 }
1163 }
1164 } else {
1165 tracing::warn!("semantic search failed: {}", message);
1166 return Err(ErrorData::internal_error(message, None));
1167 }
1168 }
1169 };
1170 let elapsed_ms = started.elapsed().as_millis() as u64;
1171
1172 let page = self
1174 .context
1175 .session_manager
1176 .get_first_page(
1177 options,
1178 filter_valid_results(search_results.matches),
1179 config,
1180 )
1181 .await
1182 .map_err(|e| ErrorData::internal_error(e, None))?;
1183
1184 let search_params = json!({
1185 "top_k": top_k.unwrap_or(DEFAULT_MCP_TOP_K),
1186 "threshold": threshold.unwrap_or(0.6)
1187 });
1188
1189 let current_page = page.current_page;
1190 let mut structured_result =
1191 Self::search_page_to_json(page, &query_clone, "semantic", search_params, elapsed_ms);
1192
1193 if let Some(ref note) = effective_mode
1194 && let Some(metadata) = structured_result.get_mut("metadata")
1195 {
1196 metadata["fallback"] = json!(note);
1197 }
1198
1199 let summary_suffix = effective_mode
1200 .as_ref()
1201 .map(|s| format!(" [{s}]"))
1202 .unwrap_or_default();
1203
1204 let summary = format!(
1205 "Semantic search for '{}' found {} matches in {} (threshold: {:.2}, top_k: {}) - Page {}{}",
1206 query_clone,
1207 structured_result["results"]["count"],
1208 path_clone.display(),
1209 threshold.unwrap_or(0.6),
1210 top_k.unwrap_or(DEFAULT_MCP_TOP_K),
1211 current_page,
1212 summary_suffix
1213 );
1214
1215 Ok((summary, structured_result))
1216 }
1217
1218 pub async fn handle_lexical_search(
1219 &self,
1220 request: LexicalSearchRequest,
1221 ) -> Result<(String, Value), ErrorData> {
1222 if let Some(cursor) = &request.cursor {
1223 return self.handle_paginated_request(cursor, &request).await;
1224 }
1225
1226 let query = request.query.clone();
1227 let path = request.path;
1228 let top_k = request.top_k;
1229 let threshold = request.threshold;
1230 let path_buf = self.context.resolve_request_path(&path)?;
1232 let search_root = if path_buf.is_dir() {
1233 path_buf.clone()
1234 } else {
1235 path_buf
1236 .parent()
1237 .map(std::path::Path::to_path_buf)
1238 .unwrap_or_else(|| PathBuf::from("."))
1239 };
1240
1241 let respect_gitignore = request.respect_gitignore.unwrap_or(true);
1242 let use_default_excludes = request.use_default_excludes.unwrap_or(true);
1243 let exclude_patterns =
1244 resolve_exclude_patterns(request.exclude_patterns.clone(), Some(use_default_excludes));
1245 let include_patterns = resolve_include_patterns(
1246 &search_root,
1247 request.include_patterns.clone(),
1248 &exclude_patterns,
1249 )?;
1250
1251 let query_clone = query.clone();
1252 let path_clone = path_buf.clone();
1253
1254 let config = Self::extract_pagination_config(
1255 request.page_size,
1256 request.include_snippet,
1257 request.snippet_length,
1258 request.context_lines,
1259 );
1260
1261 let include_snippet = request.include_snippet.unwrap_or(true);
1262 let context_lines = request.context_lines.unwrap_or(0);
1263 let before_context_lines = request.before_context_lines.unwrap_or(context_lines);
1264 let after_context_lines = request.after_context_lines.unwrap_or(context_lines);
1265
1266 let options = SearchOptions {
1267 mode: SearchMode::Lexical,
1268 query,
1269 path: path_buf,
1270 top_k,
1271 threshold,
1272 case_insensitive: request.case_insensitive.unwrap_or(false),
1273 whole_word: request.whole_word.unwrap_or(false),
1274 fixed_string: request.fixed_string.unwrap_or(false),
1275 line_numbers: false,
1276 context_lines,
1277 before_context_lines,
1278 after_context_lines,
1279 recursive: true,
1280 json_output: false,
1281 jsonl_output: true,
1282 no_snippet: !include_snippet,
1283 reindex: false,
1284 show_scores: true,
1285 show_filenames: true,
1286 files_with_matches: false,
1287 files_without_matches: false,
1288 exclude_patterns,
1289 include_patterns,
1290 respect_gitignore,
1291 use_ckignore: true,
1292 full_section: false,
1293 rerank: false,
1294 rerank_model: None,
1295 embedding_model: None,
1296 };
1297
1298 let started = Instant::now();
1299 let search_results =
1300 match ck_engine::search_enhanced_with_indexing_progress(&options, None, None, None)
1301 .await
1302 {
1303 Ok(results) => results,
1304 Err(e) => return Err(ErrorData::internal_error(e.to_string(), None)),
1305 };
1306 let elapsed_ms = started.elapsed().as_millis() as u64;
1307
1308 let page = self
1309 .context
1310 .session_manager
1311 .get_first_page(
1312 options,
1313 filter_valid_results(search_results.matches),
1314 config,
1315 )
1316 .await
1317 .map_err(|e| ErrorData::internal_error(e, None))?;
1318
1319 let search_params = json!({
1320 "top_k": top_k,
1321 "threshold": threshold
1322 });
1323
1324 let current_page = page.current_page;
1325 let structured_result =
1326 Self::search_page_to_json(page, &query_clone, "lexical", search_params, elapsed_ms);
1327
1328 let summary = format!(
1329 "Lexical search for '{}' found {} matches in {} (top_k: {}, threshold: {}) - Page {}",
1330 query_clone,
1331 structured_result["results"]["count"],
1332 path_clone.display(),
1333 top_k
1334 .map(|v| v.to_string())
1335 .unwrap_or_else(|| "unbounded".to_string()),
1336 threshold
1337 .map(|v| format!("{v:.3}"))
1338 .unwrap_or_else(|| "n/a".into()),
1339 current_page
1340 );
1341
1342 Ok((summary, structured_result))
1343 }
1344
1345 pub async fn handle_regex_search(
1346 &self,
1347 request: RegexSearchRequest,
1348 ) -> Result<(String, Value), ErrorData> {
1349 if let Some(cursor) = &request.cursor {
1351 return self.handle_paginated_request(cursor, &request).await;
1352 }
1353 let pattern = request.pattern.clone();
1354 let path = request.path;
1355 let ignore_case = request.ignore_case;
1356 let context = request.context;
1357 let path_buf = self.context.resolve_request_path(&path)?;
1359 let search_root = if path_buf.is_dir() {
1360 path_buf.clone()
1361 } else {
1362 path_buf
1363 .parent()
1364 .map(std::path::Path::to_path_buf)
1365 .unwrap_or_else(|| PathBuf::from("."))
1366 };
1367
1368 let respect_gitignore = request.respect_gitignore.unwrap_or(true);
1369 let use_default_excludes = request.use_default_excludes.unwrap_or(true);
1370 let exclude_patterns =
1371 resolve_exclude_patterns(request.exclude_patterns.clone(), Some(use_default_excludes));
1372 let include_patterns = resolve_include_patterns(
1373 &search_root,
1374 request.include_patterns.clone(),
1375 &exclude_patterns,
1376 )?;
1377
1378 let pattern_clone = pattern.clone();
1380 let path_clone = path_buf.clone();
1381
1382 let context_lines = context.unwrap_or(0);
1383
1384 let config = Self::extract_pagination_config(
1386 request.page_size,
1387 request.include_snippet,
1388 request.snippet_length,
1389 Some(context_lines),
1390 );
1391
1392 let include_snippet = request.include_snippet.unwrap_or(true);
1393
1394 let options = SearchOptions {
1395 mode: SearchMode::Regex,
1396 query: pattern,
1397 path: path_buf,
1398 top_k: None, threshold: None, case_insensitive: ignore_case.unwrap_or(false),
1401 whole_word: request.whole_word.unwrap_or(false),
1402 fixed_string: request.fixed_string.unwrap_or(false),
1403 line_numbers: true,
1404 context_lines,
1405 before_context_lines: context_lines,
1406 after_context_lines: context_lines,
1407 recursive: true,
1408 json_output: false,
1409 jsonl_output: true,
1410 no_snippet: !include_snippet,
1411 reindex: false,
1412 show_scores: false, show_filenames: true,
1414 files_with_matches: false,
1415 files_without_matches: false,
1416 exclude_patterns,
1417 include_patterns,
1418 respect_gitignore,
1419 use_ckignore: true,
1420 full_section: false,
1421 rerank: false,
1422 rerank_model: None,
1423 embedding_model: None,
1424 };
1425
1426 let started = Instant::now();
1428 let search_results = match ck_engine::search_enhanced_with_indexing_progress(
1429 &options, None, None, None, )
1433 .await
1434 {
1435 Ok(results) => results,
1436 Err(e) => return Err(ErrorData::internal_error(e.to_string(), None)),
1437 };
1438 let elapsed_ms = started.elapsed().as_millis() as u64;
1439
1440 let page = self
1442 .context
1443 .session_manager
1444 .get_first_page(
1445 options,
1446 filter_valid_results(search_results.matches),
1447 config,
1448 )
1449 .await
1450 .map_err(|e| ErrorData::internal_error(e, None))?;
1451
1452 let search_params = json!({
1453 "ignore_case": ignore_case.unwrap_or(false),
1454 "context_lines": context.unwrap_or(0)
1455 });
1456
1457 let structured_result =
1458 Self::search_page_to_json(page, &pattern_clone, "regex", search_params, elapsed_ms);
1459
1460 let summary = format!(
1461 "Regex search for pattern '{}' found {} matches in {} (case_sensitive: {}, context: {} lines) - Page 1",
1462 pattern_clone,
1463 structured_result["results"]["count"],
1464 path_clone.display(),
1465 !ignore_case.unwrap_or(false),
1466 context.unwrap_or(0)
1467 );
1468
1469 Ok((summary, structured_result))
1470 }
1471
1472 pub async fn handle_hybrid_search(
1473 &self,
1474 request: HybridSearchRequest,
1475 ) -> Result<(String, Value), ErrorData> {
1476 if let Some(cursor) = &request.cursor {
1478 return self.handle_paginated_request(cursor, &request).await;
1479 }
1480 let query = request.query.clone();
1481 let path = request.path;
1482 let top_k = request.top_k;
1483 let threshold = request.threshold;
1484 let path_buf = self.context.resolve_request_path(&path)?;
1486 let search_root = if path_buf.is_dir() {
1487 path_buf.clone()
1488 } else {
1489 path_buf
1490 .parent()
1491 .map(std::path::Path::to_path_buf)
1492 .unwrap_or_else(|| PathBuf::from("."))
1493 };
1494
1495 let respect_gitignore = request.respect_gitignore.unwrap_or(true);
1496 let use_default_excludes = request.use_default_excludes.unwrap_or(true);
1497 let exclude_patterns =
1498 resolve_exclude_patterns(request.exclude_patterns.clone(), Some(use_default_excludes));
1499 let include_patterns = resolve_include_patterns(
1500 &search_root,
1501 request.include_patterns.clone(),
1502 &exclude_patterns,
1503 )?;
1504
1505 let query_clone = query.clone();
1507 let path_clone = path_buf.clone();
1508
1509 let config = Self::extract_pagination_config(
1511 request.page_size,
1512 request.include_snippet,
1513 request.snippet_length,
1514 request.context_lines,
1515 );
1516
1517 let include_snippet = request.include_snippet.unwrap_or(true);
1518 let context_lines = request.context_lines.unwrap_or(0);
1519 let before_context_lines = request.before_context_lines.unwrap_or(context_lines);
1520 let after_context_lines = request.after_context_lines.unwrap_or(context_lines);
1521
1522 let options = SearchOptions {
1523 mode: SearchMode::Hybrid,
1524 query,
1525 path: path_buf,
1526 top_k: top_k.or(Some(DEFAULT_MCP_TOP_K)), threshold: threshold.or(Some(0.02)), case_insensitive: request.case_insensitive.unwrap_or(false),
1529 whole_word: request.whole_word.unwrap_or(false),
1530 fixed_string: request.fixed_string.unwrap_or(false),
1531 line_numbers: false,
1532 context_lines,
1533 before_context_lines,
1534 after_context_lines,
1535 recursive: true,
1536 json_output: false,
1537 jsonl_output: true,
1538 no_snippet: !include_snippet,
1539 reindex: false,
1540 show_scores: true,
1541 show_filenames: true,
1542 files_with_matches: false,
1543 files_without_matches: false,
1544 exclude_patterns,
1545 include_patterns,
1546 respect_gitignore,
1547 use_ckignore: true,
1548 full_section: false,
1549 rerank: request.rerank.unwrap_or(false),
1550 rerank_model: request.rerank_model.clone(),
1551 embedding_model: None,
1552 };
1553
1554 let started = Instant::now();
1556 let search_results = match ck_engine::search_enhanced_with_indexing_progress(
1557 &options, None, None, None, )
1561 .await
1562 {
1563 Ok(results) => results,
1564 Err(e) => return Err(ErrorData::internal_error(e.to_string(), None)),
1565 };
1566 let elapsed_ms = started.elapsed().as_millis() as u64;
1567
1568 let page = self
1570 .context
1571 .session_manager
1572 .get_first_page(
1573 options,
1574 filter_valid_results(search_results.matches),
1575 config,
1576 )
1577 .await
1578 .map_err(|e| ErrorData::internal_error(e, None))?;
1579
1580 let search_params = json!({
1581 "top_k": top_k.unwrap_or(DEFAULT_MCP_TOP_K),
1582 "threshold": threshold.unwrap_or(0.02)
1583 });
1584
1585 let current_page = page.current_page;
1586 let structured_result =
1587 Self::search_page_to_json(page, &query_clone, "hybrid", search_params, elapsed_ms);
1588
1589 let summary = format!(
1590 "Hybrid search for '{}' found {} matches in {} (threshold: {:.3}, top_k: {}, combines semantic + regex) - Page {}",
1591 query_clone,
1592 structured_result["results"]["count"],
1593 path_clone.display(),
1594 threshold.unwrap_or(0.02),
1595 top_k.unwrap_or(DEFAULT_MCP_TOP_K),
1596 current_page
1597 );
1598
1599 Ok((summary, structured_result))
1600 }
1601
1602 async fn handle_index_status(
1603 &self,
1604 request: IndexStatusRequest,
1605 _meta: Option<Meta>,
1606 _peer: Option<Peer<RoleServer>>,
1607 ) -> Result<(String, Value), ErrorData> {
1608 let path = request.path;
1609 let path_buf = self.context.resolve_request_path(&path)?;
1611
1612 let lock = self.context.get_index_lock(&path_buf).await;
1614 let _guard = lock.lock().await;
1615
1616 let index_path = path_buf.join(".ck");
1618 let index_exists = index_path.exists();
1619
1620 let mut index_info = json!({
1621 "path": path_buf.to_string_lossy(),
1622 "index_exists": index_exists,
1623 "index_path": index_path.to_string_lossy(),
1624 });
1625
1626 if index_exists {
1627 if let Ok(metadata) = std::fs::metadata(&index_path) {
1629 index_info["index_size_bytes"] = json!(metadata.len());
1630 index_info["last_modified"] = json!(
1631 metadata
1632 .modified()
1633 .map(|t| t
1634 .duration_since(std::time::UNIX_EPOCH)
1635 .unwrap_or_default()
1636 .as_secs())
1637 .unwrap_or(0)
1638 );
1639 }
1640
1641 if let Some(cached_stats) = self.context.stats_cache.get(&path_buf).await {
1643 index_info["total_files"] = json!(cached_stats.file_count);
1644 index_info["total_chunks"] = json!(cached_stats.chunk_count);
1645 index_info["cache_hit"] = json!(true);
1646 } else if let Ok(index_stats) = ck_index::get_index_stats(&path_buf) {
1647 index_info["total_files"] = json!(index_stats.total_files);
1648 index_info["total_chunks"] = json!(index_stats.total_chunks);
1649 index_info["embedded_chunks"] = json!(index_stats.embedded_chunks);
1650 index_info["total_size_bytes"] = json!(index_stats.total_size_bytes);
1651 index_info["cache_hit"] = json!(false);
1652
1653 let manifest_path = path_buf.join(".ck").join("manifest.json");
1655 if let Ok(data) = std::fs::read(&manifest_path)
1656 && let Ok(manifest) = serde_json::from_slice::<ck_index::IndexManifest>(&data)
1657 && let Some(model_name) = manifest.embedding_model
1658 {
1659 let registry = ck_models::ModelRegistry::default();
1660 let alias = registry
1661 .models
1662 .iter()
1663 .find(|(_, config)| config.name == model_name)
1664 .map(|(alias, _)| alias.clone())
1665 .unwrap_or_else(|| model_name.clone());
1666 let dims = manifest
1667 .embedding_dimensions
1668 .or_else(|| {
1669 registry
1670 .models
1671 .iter()
1672 .find(|(_, config)| config.name == model_name)
1673 .map(|(_, config)| config.dimensions)
1674 })
1675 .unwrap_or(0);
1676
1677 index_info["model"] = json!({
1678 "name": model_name,
1679 "alias": alias,
1680 "dimensions": dims,
1681 });
1682 }
1683
1684 let cache_stats = crate::mcp::cache::IndexStats {
1686 file_count: index_stats.total_files,
1687 chunk_count: index_stats.total_chunks,
1688 model_name: "unknown".to_string(), last_updated: std::time::SystemTime::now(),
1690 is_valid: true,
1691 };
1692 self.context
1693 .stats_cache
1694 .update(path_buf.clone(), cache_stats)
1695 .await;
1696 } else {
1697 let file_count = WalkDir::new(&path_buf)
1699 .into_iter()
1700 .filter_map(std::result::Result::ok)
1701 .filter(|e| e.file_type().is_file())
1702 .count();
1703
1704 index_info["estimated_file_count"] = json!(file_count);
1705 }
1706 }
1707
1708 let structured_result = json!({
1709 "index_status": index_info,
1710 "metadata": {
1711 "checked_at": chrono::Utc::now().to_rfc3339(),
1712 "path_type": if path_buf.is_dir() { "directory" } else { "file" }
1713 }
1714 });
1715
1716 let summary = if index_exists {
1717 let file_count = index_info
1718 .get("total_files")
1719 .or_else(|| index_info.get("estimated_file_count"))
1720 .and_then(serde_json::Value::as_u64)
1721 .unwrap_or(0);
1722 let chunk_count = index_info
1723 .get("total_chunks")
1724 .and_then(serde_json::Value::as_u64)
1725 .unwrap_or(0);
1726
1727 if chunk_count > 0 {
1728 format!(
1729 "Index exists for {} with {} files and {} chunks",
1730 path_buf.display(),
1731 file_count,
1732 chunk_count
1733 )
1734 } else {
1735 format!(
1736 "Index exists for {} with {} files",
1737 path_buf.display(),
1738 file_count
1739 )
1740 }
1741 } else {
1742 format!(
1743 "No index found for {} - indexing would be required for semantic search",
1744 path_buf.display()
1745 )
1746 };
1747
1748 Ok((summary, structured_result))
1749 }
1750
1751 async fn handle_reindex(
1752 &self,
1753 request: ReindexRequest,
1754 meta: Option<Meta>,
1755 peer: Option<Peer<RoleServer>>,
1756 ) -> Result<(String, Value), ErrorData> {
1757 let path = request.path;
1758 let force = request.force.unwrap_or(false);
1759 let path_buf = self.context.resolve_request_path(&path)?;
1761
1762 let lock = self.context.get_index_lock(&path_buf).await;
1764 let _guard = lock.lock().await;
1765
1766 let progress_callback = if let (Some(meta), Some(peer)) = (&meta, &peer) {
1768 if let Some(progress_token) = meta.get_progress_token() {
1769 let token = progress_token.clone();
1770 let peer = peer.clone();
1771 let step_count = Arc::new(AtomicUsize::new(0));
1772 Some(Box::new(move |message: &str| {
1773 let token = token.clone();
1774 let peer = peer.clone();
1775 let message = message.to_string();
1776 let current_step = step_count.fetch_add(1, Ordering::SeqCst) + 1;
1777 tokio::spawn(async move {
1778 let _ = peer
1779 .notify_progress(ProgressNotificationParam {
1780 progress_token: token,
1781 progress: current_step as f64,
1782 total: None, message: Some(message),
1784 })
1785 .await;
1786 });
1787 }) as ck_engine::IndexingProgressCallback)
1788 } else {
1789 None
1790 }
1791 } else {
1792 None
1793 };
1794
1795 let options = SearchOptions {
1797 mode: SearchMode::Semantic, query: String::new(), path: path_buf.clone(),
1800 top_k: None,
1801 threshold: None,
1802 case_insensitive: false,
1803 whole_word: false,
1804 fixed_string: false,
1805 line_numbers: false,
1806 context_lines: 0,
1807 before_context_lines: 0,
1808 after_context_lines: 0,
1809 recursive: true,
1810 json_output: false,
1811 jsonl_output: true,
1812 no_snippet: false,
1813 reindex: force, show_scores: false,
1815 show_filenames: false,
1816 files_with_matches: false,
1817 files_without_matches: false,
1818 exclude_patterns: get_default_exclude_patterns(),
1819 include_patterns: Vec::new(),
1820 respect_gitignore: true,
1821 use_ckignore: true,
1822 full_section: false,
1823 rerank: false,
1824 rerank_model: None,
1825 embedding_model: None,
1826 };
1827
1828 let start_time = std::time::Instant::now();
1830 let reindex_result = match ck_engine::search_enhanced_with_indexing_progress(
1831 &options,
1832 None, progress_callback,
1834 None, )
1836 .await
1837 {
1838 Ok(_) => {
1839 let duration = start_time.elapsed();
1840
1841 self.context.stats_cache.invalidate(&path_buf).await;
1843
1844 json!({
1845 "status": "success",
1846 "duration_ms": duration.as_millis(),
1847 "path": path_buf.to_string_lossy(),
1848 "force": force,
1849 })
1850 }
1851 Err(e) => {
1852 return Err(ErrorData::internal_error(
1853 format!("Reindexing failed: {e}"),
1854 None,
1855 ));
1856 }
1857 };
1858
1859 let structured_result = json!({
1860 "reindex_result": reindex_result,
1861 "metadata": {
1862 "completed_at": chrono::Utc::now().to_rfc3339(),
1863 "path_type": if path_buf.is_dir() { "directory" } else { "file" }
1864 }
1865 });
1866
1867 let summary = format!(
1868 "Successfully reindexed {} in {}ms",
1869 path_buf.display(),
1870 reindex_result.get("duration_ms").unwrap_or(&json!(0))
1871 );
1872
1873 Ok((summary, structured_result))
1874 }
1875}