agentroot_mcp/
tools.rs

1//! MCP tool definitions and handlers
2
3use crate::protocol::*;
4use agentroot_core::{Database, SearchOptions};
5use anyhow::Result;
6use serde_json::Value;
7
8pub fn search_tool_definition() -> ToolDefinition {
9    ToolDefinition {
10        name: "search".to_string(),
11        description: "BM25 full-text search across your knowledge base".to_string(),
12        input_schema: serde_json::json!({
13            "type": "object",
14            "properties": {
15                "query": {
16                    "type": "string",
17                    "description": "Search query (keywords or phrases)"
18                },
19                "limit": {
20                    "type": "integer",
21                    "description": "Maximum results (default: 20)",
22                    "default": 20
23                },
24                "minScore": {
25                    "type": "number",
26                    "description": "Minimum relevance score 0-1 (default: 0)",
27                    "default": 0
28                },
29                "collection": {
30                    "type": "string",
31                    "description": "Filter by collection name"
32                },
33                "provider": {
34                    "type": "string",
35                    "description": "Filter by provider type (file, github, url, etc.)"
36                },
37                "category": {
38                    "type": "string",
39                    "description": "Filter by document category (tutorial, reference, code, config, etc.)"
40                },
41                "difficulty": {
42                    "type": "string",
43                    "description": "Filter by difficulty level (beginner, intermediate, advanced)"
44                },
45                "concept": {
46                    "type": "string",
47                    "description": "Filter by concept/topic"
48                }
49            },
50            "required": ["query"]
51        }),
52    }
53}
54
55pub fn vsearch_tool_definition() -> ToolDefinition {
56    ToolDefinition {
57        name: "vsearch".to_string(),
58        description: "Vector similarity search using embeddings".to_string(),
59        input_schema: serde_json::json!({
60            "type": "object",
61            "properties": {
62                "query": {
63                    "type": "string",
64                    "description": "Search query (natural language)"
65                },
66                "limit": {
67                    "type": "integer",
68                    "description": "Maximum results (default: 20)",
69                    "default": 20
70                },
71                "minScore": {
72                    "type": "number",
73                    "description": "Minimum similarity score 0-1 (default: 0.3)",
74                    "default": 0.3
75                },
76                "collection": {
77                    "type": "string",
78                    "description": "Filter by collection name"
79                },
80                "provider": {
81                    "type": "string",
82                    "description": "Filter by provider type (file, github, url, etc.)"
83                },
84                "category": {
85                    "type": "string",
86                    "description": "Filter by document category (tutorial, reference, code, config, etc.)"
87                },
88                "difficulty": {
89                    "type": "string",
90                    "description": "Filter by difficulty level (beginner, intermediate, advanced)"
91                },
92                "concept": {
93                    "type": "string",
94                    "description": "Filter by concept/topic"
95                }
96            },
97            "required": ["query"]
98        }),
99    }
100}
101
102pub fn query_tool_definition() -> ToolDefinition {
103    ToolDefinition {
104        name: "query".to_string(),
105        description: "Hybrid search with BM25, vectors, and reranking (best quality)".to_string(),
106        input_schema: serde_json::json!({
107            "type": "object",
108            "properties": {
109                "query": {
110                    "type": "string",
111                    "description": "Search query"
112                },
113                "limit": {
114                    "type": "integer",
115                    "description": "Maximum results (default: 20)",
116                    "default": 20
117                },
118                "collection": {
119                    "type": "string",
120                    "description": "Filter by collection name"
121                },
122                "provider": {
123                    "type": "string",
124                    "description": "Filter by provider type (file, github, url, etc.)"
125                },
126                "category": {
127                    "type": "string",
128                    "description": "Filter by document category (tutorial, reference, code, config, etc.)"
129                },
130                "difficulty": {
131                    "type": "string",
132                    "description": "Filter by difficulty level (beginner, intermediate, advanced)"
133                },
134                "concept": {
135                    "type": "string",
136                    "description": "Filter by concept/topic"
137                }
138            },
139            "required": ["query"]
140        }),
141    }
142}
143
144pub fn smart_search_tool_definition() -> ToolDefinition {
145    ToolDefinition {
146        name: "smart_search".to_string(),
147        description: "Intelligent natural language search with automatic query understanding and filtering. Understands temporal filters like 'last hour', metadata filters like 'by Alice', and automatically falls back to BM25 if models are unavailable.".to_string(),
148        input_schema: serde_json::json!({
149            "type": "object",
150            "properties": {
151                "query": {
152                    "type": "string",
153                    "description": "Natural language search query (e.g., 'files edited last hour', 'rust tutorials by Alice')"
154                },
155                "limit": {
156                    "type": "integer",
157                    "description": "Maximum results (default: 20)",
158                    "default": 20
159                },
160                "minScore": {
161                    "type": "number",
162                    "description": "Minimum relevance score 0-1 (default: 0)",
163                    "default": 0
164                },
165                "collection": {
166                    "type": "string",
167                    "description": "Filter by collection name"
168                }
169            },
170            "required": ["query"]
171        }),
172    }
173}
174
175pub fn get_tool_definition() -> ToolDefinition {
176    ToolDefinition {
177        name: "get".to_string(),
178        description: "Get a document by path, docid, or virtual path".to_string(),
179        input_schema: serde_json::json!({
180            "type": "object",
181            "properties": {
182                "file": {
183                    "type": "string",
184                    "description": "File path, docid (#abc123), or agentroot:// URI"
185                },
186                "fromLine": {
187                    "type": "integer",
188                    "description": "Start from line number"
189                },
190                "maxLines": {
191                    "type": "integer",
192                    "description": "Maximum lines to return"
193                },
194                "lineNumbers": {
195                    "type": "boolean",
196                    "description": "Include line numbers",
197                    "default": false
198                }
199            },
200            "required": ["file"]
201        }),
202    }
203}
204
205pub fn multi_get_tool_definition() -> ToolDefinition {
206    ToolDefinition {
207        name: "multi_get".to_string(),
208        description: "Get multiple documents by glob pattern or comma-separated list".to_string(),
209        input_schema: serde_json::json!({
210            "type": "object",
211            "properties": {
212                "pattern": {
213                    "type": "string",
214                    "description": "Glob pattern or comma-separated list of paths/docids"
215                },
216                "maxLines": {
217                    "type": "integer",
218                    "description": "Maximum lines per file"
219                },
220                "maxBytes": {
221                    "type": "integer",
222                    "description": "Skip files larger than this (default: 10240)",
223                    "default": 10240
224                },
225                "lineNumbers": {
226                    "type": "boolean",
227                    "description": "Include line numbers",
228                    "default": false
229                }
230            },
231            "required": ["pattern"]
232        }),
233    }
234}
235
236pub fn status_tool_definition() -> ToolDefinition {
237    ToolDefinition {
238        name: "status".to_string(),
239        description: "Show index status and collection information".to_string(),
240        input_schema: serde_json::json!({
241            "type": "object",
242            "properties": {}
243        }),
244    }
245}
246
247pub fn collection_add_tool_definition() -> ToolDefinition {
248    ToolDefinition {
249        name: "collection_add".to_string(),
250        description: "Add a new collection to index".to_string(),
251        input_schema: serde_json::json!({
252            "type": "object",
253            "properties": {
254                "name": {
255                    "type": "string",
256                    "description": "Collection name"
257                },
258                "path": {
259                    "type": "string",
260                    "description": "Path to local directory or URL"
261                },
262                "pattern": {
263                    "type": "string",
264                    "description": "Glob pattern for files (default: **/*.md)",
265                    "default": "**/*.md"
266                },
267                "provider": {
268                    "type": "string",
269                    "description": "Provider type: file, github, url (default: file)",
270                    "default": "file"
271                },
272                "config": {
273                    "type": "string",
274                    "description": "Provider-specific JSON configuration"
275                }
276            },
277            "required": ["name", "path"]
278        }),
279    }
280}
281
282pub fn collection_remove_tool_definition() -> ToolDefinition {
283    ToolDefinition {
284        name: "collection_remove".to_string(),
285        description: "Remove a collection and its documents".to_string(),
286        input_schema: serde_json::json!({
287            "type": "object",
288            "properties": {
289                "name": {
290                    "type": "string",
291                    "description": "Collection name to remove"
292                }
293            },
294            "required": ["name"]
295        }),
296    }
297}
298
299pub fn collection_update_tool_definition() -> ToolDefinition {
300    ToolDefinition {
301        name: "collection_update".to_string(),
302        description: "Reindex a collection (scan for new/changed documents)".to_string(),
303        input_schema: serde_json::json!({
304            "type": "object",
305            "properties": {
306                "name": {
307                    "type": "string",
308                    "description": "Collection name to update"
309                }
310            },
311            "required": ["name"]
312        }),
313    }
314}
315
316pub async fn handle_search(db: &Database, args: Value) -> Result<ToolResult> {
317    let query = args
318        .get("query")
319        .and_then(|v| v.as_str())
320        .ok_or_else(|| anyhow::anyhow!("Missing query"))?;
321
322    let options = SearchOptions {
323        limit: args.get("limit").and_then(|v| v.as_u64()).unwrap_or(20) as usize,
324        min_score: args.get("minScore").and_then(|v| v.as_f64()).unwrap_or(0.0),
325        collection: args
326            .get("collection")
327            .and_then(|v| v.as_str())
328            .map(String::from),
329        provider: args
330            .get("provider")
331            .and_then(|v| v.as_str())
332            .map(String::from),
333        full_content: false,
334    };
335
336    let mut results = db.search_fts(query, &options)?;
337
338    // Apply metadata filters
339    let category_filter = args.get("category").and_then(|v| v.as_str());
340    let difficulty_filter = args.get("difficulty").and_then(|v| v.as_str());
341    let concept_filter = args.get("concept").and_then(|v| v.as_str());
342
343    if category_filter.is_some() || difficulty_filter.is_some() || concept_filter.is_some() {
344        results.retain(|r| {
345            let matches_category = category_filter.map_or(true, |cat| {
346                r.llm_category
347                    .as_ref()
348                    .map_or(false, |c| c.to_lowercase().contains(&cat.to_lowercase()))
349            });
350            let matches_difficulty = difficulty_filter.map_or(true, |diff| {
351                r.llm_difficulty
352                    .as_ref()
353                    .map_or(false, |d| d.to_lowercase() == diff.to_lowercase())
354            });
355            let matches_concept = concept_filter.map_or(true, |concept| {
356                r.llm_keywords.as_ref().map_or(false, |kws| {
357                    kws.iter()
358                        .any(|kw| kw.to_lowercase().contains(&concept.to_lowercase()))
359                })
360            });
361            matches_category && matches_difficulty && matches_concept
362        });
363    }
364
365    let summary = format!("Found {} results for \"{}\"", results.len(), query);
366    let structured: Vec<Value> = results
367        .iter()
368        .map(|r| {
369            let mut result_json = serde_json::json!({
370                "docid": format!("#{}", r.docid),
371                "file": r.display_path,
372                "title": r.title,
373                "score": (r.score * 100.0).round() / 100.0
374            });
375
376            // Include LLM metadata if available
377            if let Some(summary) = &r.llm_summary {
378                result_json["summary"] = Value::String(summary.clone());
379            }
380            if let Some(category) = &r.llm_category {
381                result_json["category"] = Value::String(category.clone());
382            }
383            if let Some(difficulty) = &r.llm_difficulty {
384                result_json["difficulty"] = Value::String(difficulty.clone());
385            }
386            if let Some(keywords) = &r.llm_keywords {
387                result_json["keywords"] = serde_json::to_value(keywords).unwrap();
388            }
389
390            // Include user metadata if available
391            if let Some(user_meta) = &r.user_metadata {
392                if let Ok(json_str) = user_meta.to_json() {
393                    if let Ok(parsed) = serde_json::from_str::<Value>(&json_str) {
394                        result_json["userMetadata"] = parsed;
395                    }
396                }
397            }
398
399            result_json
400        })
401        .collect();
402
403    Ok(ToolResult {
404        content: vec![Content::Text { text: summary }],
405        structured_content: Some(serde_json::json!({ "results": structured })),
406        is_error: None,
407    })
408}
409
410pub async fn handle_vsearch(db: &Database, args: Value) -> Result<ToolResult> {
411    if !db.has_vector_index() {
412        return Ok(ToolResult {
413            content: vec![Content::Text {
414                text: "Vector index not found. Run 'agentroot embed' first.".to_string(),
415            }],
416            structured_content: None,
417            is_error: Some(true),
418        });
419    }
420
421    let query = args
422        .get("query")
423        .and_then(|v| v.as_str())
424        .ok_or_else(|| anyhow::anyhow!("Missing query"))?;
425
426    let options = SearchOptions {
427        limit: args.get("limit").and_then(|v| v.as_u64()).unwrap_or(20) as usize,
428        min_score: args.get("minScore").and_then(|v| v.as_f64()).unwrap_or(0.3),
429        collection: args
430            .get("collection")
431            .and_then(|v| v.as_str())
432            .map(String::from),
433        provider: args
434            .get("provider")
435            .and_then(|v| v.as_str())
436            .map(String::from),
437        full_content: false,
438    };
439
440    // Try HTTP embedder first, fallback to local
441    let embedder: Box<dyn agentroot_core::Embedder> = if let Ok(http) =
442        agentroot_core::HttpEmbedder::from_env()
443    {
444        Box::new(http)
445    } else if let Ok(local) = agentroot_core::LlamaEmbedder::from_default() {
446        Box::new(local)
447    } else {
448        return Ok(ToolResult {
449                content: vec![Content::Text {
450                    text: "Could not load embedding model. Configure HTTP service via AGENTROOT_EMBEDDING_URL \
451                          or download a local model. See: https://github.com/epappas/agentroot#embedding-models"
452                        .to_string(),
453                }],
454                structured_content: None,
455                is_error: Some(true),
456            });
457    };
458
459    let mut results = db.search_vec(query, embedder.as_ref(), &options).await?;
460
461    // Apply metadata filters
462    let category_filter = args.get("category").and_then(|v| v.as_str());
463    let difficulty_filter = args.get("difficulty").and_then(|v| v.as_str());
464    let concept_filter = args.get("concept").and_then(|v| v.as_str());
465
466    if category_filter.is_some() || difficulty_filter.is_some() || concept_filter.is_some() {
467        results.retain(|r| {
468            let matches_category = category_filter.map_or(true, |cat| {
469                r.llm_category
470                    .as_ref()
471                    .map_or(false, |c| c.to_lowercase().contains(&cat.to_lowercase()))
472            });
473            let matches_difficulty = difficulty_filter.map_or(true, |diff| {
474                r.llm_difficulty
475                    .as_ref()
476                    .map_or(false, |d| d.to_lowercase() == diff.to_lowercase())
477            });
478            let matches_concept = concept_filter.map_or(true, |concept| {
479                r.llm_keywords.as_ref().map_or(false, |kws| {
480                    kws.iter()
481                        .any(|kw| kw.to_lowercase().contains(&concept.to_lowercase()))
482                })
483            });
484            matches_category && matches_difficulty && matches_concept
485        });
486    }
487
488    let summary = format!("Found {} results for \"{}\"", results.len(), query);
489    let structured: Vec<Value> = results
490        .iter()
491        .map(|r| {
492            let mut result_json = serde_json::json!({
493                "docid": format!("#{}", r.docid),
494                "file": r.display_path,
495                "title": r.title,
496                "score": (r.score * 100.0).round() / 100.0
497            });
498
499            // Include LLM metadata if available
500            if let Some(summary) = &r.llm_summary {
501                result_json["summary"] = Value::String(summary.clone());
502            }
503            if let Some(category) = &r.llm_category {
504                result_json["category"] = Value::String(category.clone());
505            }
506            if let Some(difficulty) = &r.llm_difficulty {
507                result_json["difficulty"] = Value::String(difficulty.clone());
508            }
509            if let Some(keywords) = &r.llm_keywords {
510                result_json["keywords"] = serde_json::to_value(keywords).unwrap();
511            }
512
513            // Include user metadata if available
514            if let Some(user_meta) = &r.user_metadata {
515                if let Ok(json_str) = user_meta.to_json() {
516                    if let Ok(parsed) = serde_json::from_str::<Value>(&json_str) {
517                        result_json["userMetadata"] = parsed;
518                    }
519                }
520            }
521
522            result_json
523        })
524        .collect();
525
526    Ok(ToolResult {
527        content: vec![Content::Text { text: summary }],
528        structured_content: Some(serde_json::json!({ "results": structured })),
529        is_error: None,
530    })
531}
532
533pub async fn handle_query(db: &Database, args: Value) -> Result<ToolResult> {
534    if !db.has_vector_index() {
535        return handle_search(db, args).await;
536    }
537
538    let query = args
539        .get("query")
540        .and_then(|v| v.as_str())
541        .ok_or_else(|| anyhow::anyhow!("Missing query"))?;
542
543    let options = SearchOptions {
544        limit: args.get("limit").and_then(|v| v.as_u64()).unwrap_or(20) as usize,
545        min_score: 0.0,
546        collection: args
547            .get("collection")
548            .and_then(|v| v.as_str())
549            .map(String::from),
550        provider: args
551            .get("provider")
552            .and_then(|v| v.as_str())
553            .map(String::from),
554        full_content: false,
555    };
556
557    // Try HTTP embedder first, fallback to local, then BM25-only
558    let embedder: Box<dyn agentroot_core::Embedder> =
559        if let Ok(http) = agentroot_core::HttpEmbedder::from_env() {
560            Box::new(http)
561        } else if let Ok(local) = agentroot_core::LlamaEmbedder::from_default() {
562            Box::new(local)
563        } else {
564            // No embedder available, fall back to BM25-only search
565            return handle_search(db, args).await;
566        };
567
568    let bm25_results = db.search_fts(query, &options)?;
569    let vec_results = db.search_vec(query, embedder.as_ref(), &options).await?;
570
571    let fused_results = agentroot_core::search::rrf_fusion(&bm25_results, &vec_results);
572
573    let mut final_results: Vec<_> = fused_results
574        .into_iter()
575        .filter(|r| r.score >= options.min_score)
576        .take(options.limit)
577        .collect();
578
579    // Apply metadata filters
580    let category_filter = args.get("category").and_then(|v| v.as_str());
581    let difficulty_filter = args.get("difficulty").and_then(|v| v.as_str());
582    let concept_filter = args.get("concept").and_then(|v| v.as_str());
583
584    if category_filter.is_some() || difficulty_filter.is_some() || concept_filter.is_some() {
585        final_results.retain(|r| {
586            let matches_category = category_filter.map_or(true, |cat| {
587                r.llm_category
588                    .as_ref()
589                    .map_or(false, |c| c.to_lowercase().contains(&cat.to_lowercase()))
590            });
591            let matches_difficulty = difficulty_filter.map_or(true, |diff| {
592                r.llm_difficulty
593                    .as_ref()
594                    .map_or(false, |d| d.to_lowercase() == diff.to_lowercase())
595            });
596            let matches_concept = concept_filter.map_or(true, |concept| {
597                r.llm_keywords.as_ref().map_or(false, |kws| {
598                    kws.iter()
599                        .any(|kw| kw.to_lowercase().contains(&concept.to_lowercase()))
600                })
601            });
602            matches_category && matches_difficulty && matches_concept
603        });
604    }
605
606    let summary = format!(
607        "Found {} results for \"{}\" (hybrid search)",
608        final_results.len(),
609        query
610    );
611    let structured: Vec<Value> = final_results
612        .iter()
613        .map(|r| {
614            let mut result_json = serde_json::json!({
615                "docid": format!("#{}", r.docid),
616                "file": r.display_path,
617                "title": r.title,
618                "score": (r.score * 100.0).round() / 100.0
619            });
620
621            // Include LLM metadata if available
622            if let Some(summary) = &r.llm_summary {
623                result_json["summary"] = Value::String(summary.clone());
624            }
625            if let Some(category) = &r.llm_category {
626                result_json["category"] = Value::String(category.clone());
627            }
628            if let Some(difficulty) = &r.llm_difficulty {
629                result_json["difficulty"] = Value::String(difficulty.clone());
630            }
631            if let Some(keywords) = &r.llm_keywords {
632                result_json["keywords"] = serde_json::to_value(keywords).unwrap();
633            }
634
635            // Include user metadata if available
636            if let Some(user_meta) = &r.user_metadata {
637                if let Ok(json_str) = user_meta.to_json() {
638                    if let Ok(parsed) = serde_json::from_str::<Value>(&json_str) {
639                        result_json["userMetadata"] = parsed;
640                    }
641                }
642            }
643
644            result_json
645        })
646        .collect();
647
648    Ok(ToolResult {
649        content: vec![Content::Text { text: summary }],
650        structured_content: Some(serde_json::json!({ "results": structured })),
651        is_error: None,
652    })
653}
654
655pub async fn handle_smart_search(db: &Database, args: Value) -> Result<ToolResult> {
656    let query = args
657        .get("query")
658        .and_then(|v| v.as_str())
659        .ok_or_else(|| anyhow::anyhow!("Missing query"))?;
660
661    let options = SearchOptions {
662        limit: args.get("limit").and_then(|v| v.as_u64()).unwrap_or(20) as usize,
663        min_score: args.get("minScore").and_then(|v| v.as_f64()).unwrap_or(0.0),
664        collection: args
665            .get("collection")
666            .and_then(|v| v.as_str())
667            .map(String::from),
668        provider: None,
669        full_content: false,
670    };
671
672    // Use smart_search which handles parsing and fallbacks
673    let results = agentroot_core::smart_search(db, query, &options).await?;
674
675    let summary = format!(
676        "Found {} results for \"{}\" (smart search)",
677        results.len(),
678        query
679    );
680    let structured: Vec<Value> = results
681        .iter()
682        .map(|r| {
683            let mut result_json = serde_json::json!({
684                "docid": format!("#{}", r.docid),
685                "file": r.display_path,
686                "title": r.title,
687                "score": (r.score * 100.0).round() / 100.0
688            });
689
690            // Include LLM metadata if available
691            if let Some(summary) = &r.llm_summary {
692                result_json["summary"] = Value::String(summary.clone());
693            }
694            if let Some(category) = &r.llm_category {
695                result_json["category"] = Value::String(category.clone());
696            }
697            if let Some(difficulty) = &r.llm_difficulty {
698                result_json["difficulty"] = Value::String(difficulty.clone());
699            }
700            if let Some(keywords) = &r.llm_keywords {
701                result_json["keywords"] = serde_json::to_value(keywords).unwrap();
702            }
703
704            // Include user metadata if available
705            if let Some(user_meta) = &r.user_metadata {
706                if let Ok(json_str) = user_meta.to_json() {
707                    if let Ok(parsed) = serde_json::from_str::<Value>(&json_str) {
708                        result_json["userMetadata"] = parsed;
709                    }
710                }
711            }
712
713            result_json
714        })
715        .collect();
716
717    Ok(ToolResult {
718        content: vec![Content::Text { text: summary }],
719        structured_content: Some(serde_json::json!({ "results": structured })),
720        is_error: None,
721    })
722}
723
724pub async fn handle_get(db: &Database, args: Value) -> Result<ToolResult> {
725    let file = args
726        .get("file")
727        .and_then(|v| v.as_str())
728        .ok_or_else(|| anyhow::anyhow!("Missing file"))?;
729
730    let doc = db
731        .find_by_docid(file)?
732        .ok_or_else(|| anyhow::anyhow!("Document not found: {}", file))?;
733
734    let body = doc.body.unwrap_or_default();
735
736    Ok(ToolResult {
737        content: vec![Content::Resource {
738            resource: ResourceContent {
739                uri: doc.filepath,
740                name: doc.display_path,
741                title: Some(doc.title),
742                mime_type: "text/markdown".to_string(),
743                text: body,
744            },
745        }],
746        structured_content: None,
747        is_error: None,
748    })
749}
750
751pub async fn handle_multi_get(db: &Database, args: Value) -> Result<ToolResult> {
752    let pattern = args
753        .get("pattern")
754        .and_then(|v| v.as_str())
755        .ok_or_else(|| anyhow::anyhow!("Missing pattern"))?;
756
757    let docs = db.fuzzy_find_documents(pattern, 10)?;
758
759    let contents: Vec<Content> = docs
760        .into_iter()
761        .map(|doc| Content::Resource {
762            resource: ResourceContent {
763                uri: doc.filepath,
764                name: doc.display_path,
765                title: Some(doc.title),
766                mime_type: "text/markdown".to_string(),
767                text: doc.body.unwrap_or_default(),
768            },
769        })
770        .collect();
771
772    Ok(ToolResult {
773        content: contents,
774        structured_content: None,
775        is_error: None,
776    })
777}
778
779pub async fn handle_status(db: &Database) -> Result<ToolResult> {
780    let collections = db.list_collections()?;
781    let total_docs: usize = collections.iter().map(|c| c.document_count).sum();
782    let needs_embedding = db.count_hashes_needing_embedding()?;
783    let has_vector = db.has_vector_index();
784
785    let mut provider_stats: std::collections::HashMap<String, (usize, usize)> =
786        std::collections::HashMap::new();
787    for coll in &collections {
788        let entry = provider_stats
789            .entry(coll.provider_type.clone())
790            .or_insert((0, 0));
791        entry.0 += 1;
792        entry.1 += coll.document_count;
793    }
794
795    let mut provider_summary = String::new();
796    for (provider, (coll_count, doc_count)) in &provider_stats {
797        provider_summary.push_str(&format!(
798            "\n  - {}: {} collections, {} documents",
799            provider, coll_count, doc_count
800        ));
801    }
802
803    let summary = format!(
804        "Index: {} documents across {} collections\n\
805         Embeddings: {}\n\
806         Vector index: {}\n\
807         \n\
808         Providers:{}",
809        total_docs,
810        collections.len(),
811        if needs_embedding > 0 {
812            format!("{} documents need embedding", needs_embedding)
813        } else {
814            "Up to date".to_string()
815        },
816        if has_vector {
817            "Available"
818        } else {
819            "Not created"
820        },
821        provider_summary
822    );
823
824    let provider_stats_json: Vec<_> = provider_stats
825        .iter()
826        .map(|(provider, (coll_count, doc_count))| {
827            serde_json::json!({
828                "provider": provider,
829                "collections": coll_count,
830                "documents": doc_count
831            })
832        })
833        .collect();
834
835    let structured = serde_json::json!({
836        "totalDocuments": total_docs,
837        "needsEmbedding": needs_embedding,
838        "hasVectorIndex": has_vector,
839        "providers": provider_stats_json,
840        "collections": collections.iter().map(|c| serde_json::json!({
841            "name": c.name,
842            "path": c.path,
843            "pattern": c.pattern,
844            "provider": c.provider_type,
845            "documents": c.document_count
846        })).collect::<Vec<_>>()
847    });
848
849    Ok(ToolResult {
850        content: vec![Content::Text { text: summary }],
851        structured_content: Some(structured),
852        is_error: None,
853    })
854}
855
856pub async fn handle_collection_add(db: &Database, args: Value) -> Result<ToolResult> {
857    let name = args
858        .get("name")
859        .and_then(|v| v.as_str())
860        .ok_or_else(|| anyhow::anyhow!("Missing collection name"))?;
861
862    let path = args
863        .get("path")
864        .and_then(|v| v.as_str())
865        .ok_or_else(|| anyhow::anyhow!("Missing path"))?;
866
867    let pattern = args
868        .get("pattern")
869        .and_then(|v| v.as_str())
870        .unwrap_or("**/*.md");
871
872    let provider = args
873        .get("provider")
874        .and_then(|v| v.as_str())
875        .unwrap_or("file");
876
877    let config = args.get("config").and_then(|v| v.as_str());
878
879    db.add_collection(name, path, pattern, provider, config)?;
880
881    let summary = format!(
882        "Added collection '{}' (provider: {}, path: {})",
883        name, provider, path
884    );
885
886    Ok(ToolResult {
887        content: vec![Content::Text { text: summary }],
888        structured_content: Some(serde_json::json!({
889            "name": name,
890            "path": path,
891            "pattern": pattern,
892            "provider": provider
893        })),
894        is_error: None,
895    })
896}
897
898pub async fn handle_collection_remove(db: &Database, args: Value) -> Result<ToolResult> {
899    let name = args
900        .get("name")
901        .and_then(|v| v.as_str())
902        .ok_or_else(|| anyhow::anyhow!("Missing collection name"))?;
903
904    let removed = db.remove_collection(name)?;
905
906    if removed {
907        Ok(ToolResult {
908            content: vec![Content::Text {
909                text: format!("Removed collection '{}'", name),
910            }],
911            structured_content: Some(serde_json::json!({
912                "name": name,
913                "removed": true
914            })),
915            is_error: None,
916        })
917    } else {
918        Ok(ToolResult {
919            content: vec![Content::Text {
920                text: format!("Collection '{}' not found", name),
921            }],
922            structured_content: Some(serde_json::json!({
923                "name": name,
924                "removed": false
925            })),
926            is_error: Some(true),
927        })
928    }
929}
930
931pub async fn handle_collection_update(db: &Database, args: Value) -> Result<ToolResult> {
932    let name = args
933        .get("name")
934        .and_then(|v| v.as_str())
935        .ok_or_else(|| anyhow::anyhow!("Missing collection name"))?;
936
937    let updated = db.reindex_collection(name).await?;
938
939    let summary = format!("Updated collection '{}': {} files changed", name, updated);
940
941    Ok(ToolResult {
942        content: vec![Content::Text { text: summary }],
943        structured_content: Some(serde_json::json!({
944            "name": name,
945            "filesUpdated": updated
946        })),
947        is_error: None,
948    })
949}
950
951pub fn metadata_add_tool_definition() -> ToolDefinition {
952    ToolDefinition {
953        name: "metadata_add".to_string(),
954        description: "Add custom user metadata to a document".to_string(),
955        input_schema: serde_json::json!({
956            "type": "object",
957            "properties": {
958                "docid": {
959                    "type": "string",
960                    "description": "Document ID (#abc123) or path"
961                },
962                "metadata": {
963                    "type": "object",
964                    "description": "Metadata fields as key-value pairs. Values can be strings, numbers, booleans, or arrays",
965                    "additionalProperties": true
966                }
967            },
968            "required": ["docid", "metadata"]
969        }),
970    }
971}
972
973pub fn metadata_get_tool_definition() -> ToolDefinition {
974    ToolDefinition {
975        name: "metadata_get".to_string(),
976        description: "Get custom user metadata from a document".to_string(),
977        input_schema: serde_json::json!({
978            "type": "object",
979            "properties": {
980                "docid": {
981                    "type": "string",
982                    "description": "Document ID (#abc123) or path"
983                }
984            },
985            "required": ["docid"]
986        }),
987    }
988}
989
990pub fn metadata_query_tool_definition() -> ToolDefinition {
991    ToolDefinition {
992        name: "metadata_query".to_string(),
993        description: "Query documents by custom user metadata".to_string(),
994        input_schema: serde_json::json!({
995            "type": "object",
996            "properties": {
997                "field": {
998                    "type": "string",
999                    "description": "Metadata field name to query"
1000                },
1001                "operator": {
1002                    "type": "string",
1003                    "enum": ["eq", "contains", "gt", "lt", "has", "exists"],
1004                    "description": "Comparison operator"
1005                },
1006                "value": {
1007                    "type": "string",
1008                    "description": "Value to compare against (not needed for 'exists' operator)"
1009                },
1010                "limit": {
1011                    "type": "integer",
1012                    "description": "Maximum results (default: 20)",
1013                    "default": 20
1014                }
1015            },
1016            "required": ["field", "operator"]
1017        }),
1018    }
1019}
1020
1021pub async fn handle_metadata_add(db: &Database, args: Value) -> Result<ToolResult> {
1022    use agentroot_core::MetadataBuilder;
1023
1024    let docid = args
1025        .get("docid")
1026        .and_then(|v| v.as_str())
1027        .ok_or_else(|| anyhow::anyhow!("Missing docid"))?;
1028
1029    let metadata_obj = args
1030        .get("metadata")
1031        .and_then(|v| v.as_object())
1032        .ok_or_else(|| anyhow::anyhow!("Missing or invalid metadata"))?;
1033
1034    let mut builder = MetadataBuilder::new();
1035
1036    for (key, value) in metadata_obj {
1037        match value {
1038            Value::String(s) => {
1039                builder = builder.text(key, s.clone());
1040            }
1041            Value::Number(n) => {
1042                if let Some(i) = n.as_i64() {
1043                    builder = builder.integer(key, i);
1044                } else if let Some(f) = n.as_f64() {
1045                    builder = builder.float(key, f);
1046                }
1047            }
1048            Value::Bool(b) => {
1049                builder = builder.boolean(key, *b);
1050            }
1051            Value::Array(arr) => {
1052                let tags: Vec<String> = arr
1053                    .iter()
1054                    .filter_map(|v| v.as_str())
1055                    .map(|s| s.to_string())
1056                    .collect();
1057                builder = builder.tags(key, tags);
1058            }
1059            _ => {}
1060        }
1061    }
1062
1063    let metadata = builder.build();
1064    db.add_metadata(docid, &metadata)?;
1065
1066    let summary = format!("Added metadata to document: {}", docid);
1067
1068    Ok(ToolResult {
1069        content: vec![Content::Text { text: summary }],
1070        structured_content: Some(serde_json::json!({
1071            "docid": docid,
1072            "added": true
1073        })),
1074        is_error: None,
1075    })
1076}
1077
1078pub async fn handle_metadata_get(db: &Database, args: Value) -> Result<ToolResult> {
1079    let docid = args
1080        .get("docid")
1081        .and_then(|v| v.as_str())
1082        .ok_or_else(|| anyhow::anyhow!("Missing docid"))?;
1083
1084    match db.get_metadata(docid)? {
1085        Some(metadata) => {
1086            let json = metadata.to_json()?;
1087            let parsed: serde_json::Value = serde_json::from_str(&json)?;
1088
1089            Ok(ToolResult {
1090                content: vec![Content::Text {
1091                    text: format!("User metadata for {}: {}", docid, json),
1092                }],
1093                structured_content: Some(serde_json::json!({
1094                    "docid": docid,
1095                    "metadata": parsed
1096                })),
1097                is_error: None,
1098            })
1099        }
1100        None => Ok(ToolResult {
1101            content: vec![Content::Text {
1102                text: format!("No user metadata found for document: {}", docid),
1103            }],
1104            structured_content: Some(serde_json::json!({
1105                "docid": docid,
1106                "metadata": null
1107            })),
1108            is_error: None,
1109        }),
1110    }
1111}
1112
1113pub async fn handle_metadata_query(db: &Database, args: Value) -> Result<ToolResult> {
1114    use agentroot_core::MetadataFilter;
1115
1116    let field = args
1117        .get("field")
1118        .and_then(|v| v.as_str())
1119        .ok_or_else(|| anyhow::anyhow!("Missing field"))?
1120        .to_string();
1121
1122    let operator = args
1123        .get("operator")
1124        .and_then(|v| v.as_str())
1125        .ok_or_else(|| anyhow::anyhow!("Missing operator"))?;
1126
1127    let limit = args.get("limit").and_then(|v| v.as_u64()).unwrap_or(20) as usize;
1128
1129    let filter = match operator {
1130        "exists" => MetadataFilter::Exists(field),
1131        _ => {
1132            let value = args
1133                .get("value")
1134                .and_then(|v| v.as_str())
1135                .ok_or_else(|| anyhow::anyhow!("Missing value for operator"))?;
1136
1137            match operator {
1138                "eq" => MetadataFilter::TextEq(field, value.to_string()),
1139                "contains" => MetadataFilter::TextContains(field, value.to_string()),
1140                "gt" => {
1141                    if let Ok(num) = value.parse::<i64>() {
1142                        MetadataFilter::IntegerGt(field, num)
1143                    } else if let Ok(num) = value.parse::<f64>() {
1144                        MetadataFilter::FloatGt(field, num)
1145                    } else {
1146                        return Err(anyhow::anyhow!("Invalid numeric value for gt"));
1147                    }
1148                }
1149                "lt" => {
1150                    if let Ok(num) = value.parse::<i64>() {
1151                        MetadataFilter::IntegerLt(field, num)
1152                    } else if let Ok(num) = value.parse::<f64>() {
1153                        MetadataFilter::FloatLt(field, num)
1154                    } else {
1155                        return Err(anyhow::anyhow!("Invalid numeric value for lt"));
1156                    }
1157                }
1158                "has" => MetadataFilter::TagsContain(field, value.to_string()),
1159                _ => return Err(anyhow::anyhow!("Invalid operator")),
1160            }
1161        }
1162    };
1163
1164    let docids = db.find_by_metadata(&filter, limit)?;
1165
1166    let summary = if docids.is_empty() {
1167        "No documents found matching filter".to_string()
1168    } else {
1169        format!("Found {} document(s) matching filter", docids.len())
1170    };
1171
1172    Ok(ToolResult {
1173        content: vec![Content::Text {
1174            text: format!("{}\n{}", summary, docids.join("\n")),
1175        }],
1176        structured_content: Some(serde_json::json!({
1177            "count": docids.len(),
1178            "documents": docids
1179        })),
1180        is_error: None,
1181    })
1182}