jpx_engine/
discovery.rs

1//! Discovery Protocol implementation.
2//!
3//! This module implements a protocol for capability registration and search
4//! across servers. It uses BM25 search indexing for efficient tool discovery.
5//!
6//! # Discovery Spec
7//!
8//! Servers can register their tools using a structured discovery spec:
9//!
10//! ```json
11//! {
12//!   "server": {"name": "my-server", "version": "1.0.0"},
13//!   "tools": [
14//!     {"name": "my_tool", "description": "Does something useful", "tags": ["read"]}
15//!   ]
16//! }
17//! ```
18
19use crate::bm25::{Bm25Index, IndexOptions};
20use serde::{Deserialize, Serialize};
21use serde_json::Value;
22use std::collections::HashMap;
23
24/// Common English stop words to filter from search indexing.
25/// These words are too common to be useful for search relevance.
26const STOP_WORDS: &[&str] = &[
27    "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "he", "in", "is", "it",
28    "its", "of", "on", "or", "that", "the", "to", "was", "were", "will", "with", "this", "but",
29    "they", "have", "had", "what", "when", "where", "who", "which", "why", "how", "all", "each",
30    "every", "both", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only",
31    "own", "same", "so", "than", "too", "very", "just", "can", "could", "should", "would", "may",
32    "might", "must", "shall", "about", "above", "after", "again", "against", "below", "between",
33    "into", "through", "during", "before", "under", "over",
34];
35
36/// Preprocess text for search indexing.
37///
38/// This function cleans up text before indexing to improve search relevance:
39/// 1. Strips JMESPath literal syntax (backticks, escaped quotes)
40/// 2. Expands common regex patterns to natural language
41/// 3. Converts snake_case to separate words
42/// 4. Removes noise characters
43fn preprocess_for_search(text: &str) -> String {
44    let mut result = text.to_string();
45
46    // Strip JMESPath backtick literals: `"..."` -> ...
47    // This handles patterns like `"\n"` -> newline, `"\\d+"` -> digits
48    result = strip_jmespath_literals(&result);
49
50    // Expand common regex patterns to natural language
51    result = expand_regex_patterns(&result);
52
53    // Convert snake_case and camelCase to separate words
54    result = expand_identifiers(&result);
55
56    // Clean up extra whitespace
57    result.split_whitespace().collect::<Vec<_>>().join(" ")
58}
59
60/// Strip JMESPath backtick literal syntax from text.
61fn strip_jmespath_literals(text: &str) -> String {
62    let mut result = String::with_capacity(text.len());
63    let mut chars = text.chars().peekable();
64
65    while let Some(c) = chars.next() {
66        if c == '`' {
67            // Skip backtick and its contents, but extract meaningful parts
68            let mut inner = String::new();
69            for inner_c in chars.by_ref() {
70                if inner_c == '`' {
71                    break;
72                }
73                inner.push(inner_c);
74            }
75            // Extract content from JSON string if it looks like `"..."`
76            let trimmed = inner.trim();
77            if trimmed.starts_with('"') && trimmed.ends_with('"') {
78                let content = &trimmed[1..trimmed.len() - 1];
79                // Expand escape sequences to words
80                let expanded = expand_escape_sequences(content);
81                result.push(' ');
82                result.push_str(&expanded);
83                result.push(' ');
84            } else {
85                // Just include the inner content
86                result.push(' ');
87                result.push_str(trimmed);
88                result.push(' ');
89            }
90        } else {
91            result.push(c);
92        }
93    }
94
95    result
96}
97
98/// Expand escape sequences to natural language.
99fn expand_escape_sequences(text: &str) -> String {
100    text.replace("\\n", " newline linebreak ")
101        .replace("\\r", " return ")
102        .replace("\\t", " tab ")
103        .replace("\\s", " whitespace space ")
104        .replace("\\d", " digit number numeric ")
105        .replace("\\w", " word alphanumeric ")
106        .replace("\\b", " boundary ")
107        .replace("\\\\", " ")
108}
109
110/// Expand common regex patterns to natural language.
111fn expand_regex_patterns(text: &str) -> String {
112    text
113        // Common regex character classes
114        .replace("[0-9]", " digit number ")
115        .replace("[a-z]", " letter lowercase ")
116        .replace("[A-Z]", " letter uppercase ")
117        .replace("[a-zA-Z]", " letter alphabetic ")
118        .replace("[^>]", " ")
119        .replace(".*", " any anything ")
120        .replace(".+", " one more any ")
121        .replace("\\d+", " digits numbers numeric ")
122        .replace("\\w+", " words alphanumeric ")
123        .replace("\\s+", " whitespace spaces ")
124        .replace("\\S+", " nonwhitespace ")
125        // Clean up regex metacharacters
126        .replace(
127            ['[', ']', '(', ')', '{', '}', '*', '+', '?', '^', '$', '|'],
128            " ",
129        )
130}
131
132/// Expand snake_case and camelCase identifiers to separate words.
133fn expand_identifiers(text: &str) -> String {
134    let mut result = String::with_capacity(text.len() * 2);
135
136    for word in text.split_whitespace() {
137        // Handle snake_case
138        if word.contains('_') {
139            for part in word.split('_') {
140                if !part.is_empty() {
141                    result.push_str(part);
142                    result.push(' ');
143                }
144            }
145            // Also keep the original for exact matches
146            result.push_str(word);
147            result.push(' ');
148        }
149        // Handle camelCase (basic implementation)
150        else if word.chars().any(|c| c.is_uppercase()) && word.chars().any(|c| c.is_lowercase()) {
151            let mut prev_was_upper = false;
152            let mut current_word = String::new();
153
154            for c in word.chars() {
155                if c.is_uppercase() && !prev_was_upper && !current_word.is_empty() {
156                    result.push_str(&current_word.to_lowercase());
157                    result.push(' ');
158                    current_word.clear();
159                }
160                current_word.push(c);
161                prev_was_upper = c.is_uppercase();
162            }
163            if !current_word.is_empty() {
164                result.push_str(&current_word.to_lowercase());
165                result.push(' ');
166            }
167            // Also keep the original
168            result.push_str(word);
169            result.push(' ');
170        } else {
171            result.push_str(word);
172            result.push(' ');
173        }
174    }
175
176    result
177}
178
179/// Discovery spec - the schema MCP servers use to register their tools
180#[derive(Debug, Clone, Serialize, Deserialize)]
181pub struct DiscoverySpec {
182    /// JSON Schema reference (optional)
183    #[serde(rename = "$schema", skip_serializing_if = "Option::is_none")]
184    pub schema: Option<String>,
185
186    /// Server metadata
187    pub server: ServerInfo,
188
189    /// List of tools provided by this server
190    pub tools: Vec<ToolSpec>,
191
192    /// Category definitions (optional)
193    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
194    pub categories: HashMap<String, CategoryInfo>,
195}
196
197/// Server metadata
198#[derive(Debug, Clone, Serialize, Deserialize)]
199pub struct ServerInfo {
200    /// Server name (required)
201    pub name: String,
202
203    /// Server version (optional)
204    #[serde(skip_serializing_if = "Option::is_none")]
205    pub version: Option<String>,
206
207    /// Server description (optional)
208    #[serde(skip_serializing_if = "Option::is_none")]
209    pub description: Option<String>,
210}
211
212/// Tool specification
213#[derive(Debug, Clone, Serialize, Deserialize)]
214pub struct ToolSpec {
215    /// Tool name (required)
216    pub name: String,
217
218    /// Alternative names/aliases
219    #[serde(default, skip_serializing_if = "Vec::is_empty")]
220    pub aliases: Vec<String>,
221
222    /// Primary category
223    #[serde(skip_serializing_if = "Option::is_none")]
224    pub category: Option<String>,
225
226    /// Subcategory within the primary category
227    #[serde(skip_serializing_if = "Option::is_none")]
228    pub subcategory: Option<String>,
229
230    /// Tags for filtering and search
231    #[serde(default, skip_serializing_if = "Vec::is_empty")]
232    pub tags: Vec<String>,
233
234    /// Short summary (for search results)
235    #[serde(skip_serializing_if = "Option::is_none")]
236    pub summary: Option<String>,
237
238    /// Full description
239    #[serde(skip_serializing_if = "Option::is_none")]
240    pub description: Option<String>,
241
242    /// Parameter definitions
243    #[serde(default, skip_serializing_if = "Vec::is_empty")]
244    pub params: Vec<ParamSpec>,
245
246    /// Return type information
247    #[serde(skip_serializing_if = "Option::is_none")]
248    pub returns: Option<ReturnSpec>,
249
250    /// Usage examples
251    #[serde(default, skip_serializing_if = "Vec::is_empty")]
252    pub examples: Vec<ExampleSpec>,
253
254    /// Related tools (author-declared relationships)
255    #[serde(default, skip_serializing_if = "Vec::is_empty")]
256    pub related: Vec<String>,
257
258    /// Version when tool was added
259    #[serde(skip_serializing_if = "Option::is_none")]
260    pub since: Option<String>,
261
262    /// Stability level (stable, beta, deprecated)
263    #[serde(skip_serializing_if = "Option::is_none")]
264    pub stability: Option<String>,
265}
266
267/// Parameter specification
268#[derive(Debug, Clone, Serialize, Deserialize)]
269pub struct ParamSpec {
270    /// Parameter name
271    pub name: String,
272
273    /// Parameter type (string, number, boolean, object, array)
274    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
275    pub param_type: Option<String>,
276
277    /// Whether parameter is required
278    #[serde(default)]
279    pub required: bool,
280
281    /// Parameter description
282    #[serde(skip_serializing_if = "Option::is_none")]
283    pub description: Option<String>,
284
285    /// Allowed values (for enums)
286    #[serde(rename = "enum", skip_serializing_if = "Option::is_none")]
287    pub enum_values: Option<Vec<String>>,
288
289    /// Default value
290    #[serde(skip_serializing_if = "Option::is_none")]
291    pub default: Option<Value>,
292}
293
294/// Return type specification
295#[derive(Debug, Clone, Serialize, Deserialize)]
296pub struct ReturnSpec {
297    /// Return type
298    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
299    pub return_type: Option<String>,
300
301    /// Description of return value
302    #[serde(skip_serializing_if = "Option::is_none")]
303    pub description: Option<String>,
304}
305
306/// Example specification
307#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct ExampleSpec {
309    /// Example description
310    #[serde(skip_serializing_if = "Option::is_none")]
311    pub description: Option<String>,
312
313    /// Example arguments
314    #[serde(skip_serializing_if = "Option::is_none")]
315    pub args: Option<Value>,
316
317    /// Expected result (optional)
318    #[serde(skip_serializing_if = "Option::is_none")]
319    pub result: Option<Value>,
320}
321
322/// Category information
323#[derive(Debug, Clone, Serialize, Deserialize)]
324pub struct CategoryInfo {
325    /// Category description
326    #[serde(skip_serializing_if = "Option::is_none")]
327    pub description: Option<String>,
328
329    /// Subcategories
330    #[serde(default, skip_serializing_if = "Vec::is_empty")]
331    pub subcategories: Vec<String>,
332}
333
334/// Discovery registry - holds registered specs and search index
335#[derive(Debug)]
336pub struct DiscoveryRegistry {
337    /// Registered servers: name -> spec
338    servers: HashMap<String, DiscoverySpec>,
339
340    /// All tools flattened for indexing: tool_id -> (server_name, tool_spec)
341    tools: HashMap<String, (String, ToolSpec)>,
342
343    /// BM25 search index (rebuilt on registration changes)
344    index: Option<Bm25Index>,
345}
346
347impl Default for DiscoveryRegistry {
348    fn default() -> Self {
349        Self::new()
350    }
351}
352
353impl DiscoveryRegistry {
354    /// Create a new empty registry
355    pub fn new() -> Self {
356        Self {
357            servers: HashMap::new(),
358            tools: HashMap::new(),
359            index: None,
360        }
361    }
362
363    /// Register a discovery spec
364    pub fn register(&mut self, spec: DiscoverySpec, replace: bool) -> RegistrationResult {
365        let server_name = spec.server.name.clone();
366
367        // Check if server already registered
368        if self.servers.contains_key(&server_name) && !replace {
369            return RegistrationResult {
370                ok: false,
371                tools_indexed: 0,
372                warnings: vec![format!(
373                    "Server '{}' already registered. Use replace=true to update.",
374                    server_name
375                )],
376            };
377        }
378
379        // Remove old tools from this server if replacing
380        if replace {
381            self.tools.retain(|_, (srv, _)| srv != &server_name);
382        }
383
384        // Add new tools
385        let mut warnings = Vec::new();
386        let mut tools_added = 0;
387
388        for tool in &spec.tools {
389            let tool_id = format!("{}:{}", server_name, tool.name);
390
391            if self.tools.contains_key(&tool_id) && !replace {
392                warnings.push(format!("Tool '{}' already exists, skipping", tool_id));
393                continue;
394            }
395
396            self.tools
397                .insert(tool_id, (server_name.clone(), tool.clone()));
398            tools_added += 1;
399        }
400
401        // Store the spec
402        self.servers.insert(server_name, spec);
403
404        // Rebuild the search index
405        self.rebuild_index();
406
407        RegistrationResult {
408            ok: true,
409            tools_indexed: tools_added,
410            warnings,
411        }
412    }
413
414    /// Unregister a server
415    pub fn unregister(&mut self, server_name: &str) -> bool {
416        if self.servers.remove(server_name).is_some() {
417            self.tools.retain(|_, (srv, _)| srv != server_name);
418            self.rebuild_index();
419            true
420        } else {
421            false
422        }
423    }
424
425    /// Rebuild the BM25 search index from all registered tools
426    fn rebuild_index(&mut self) {
427        if self.tools.is_empty() {
428            self.index = None;
429            return;
430        }
431
432        // Convert tools to indexable documents with preprocessed text
433        let docs: Vec<Value> = self
434            .tools
435            .iter()
436            .map(|(id, (server, tool))| {
437                let summary = tool.summary.as_deref().unwrap_or("");
438                let description = tool.description.as_deref().unwrap_or("");
439
440                // Preprocess text fields for better search
441                let expanded_summary = preprocess_for_search(summary);
442                let expanded_description = preprocess_for_search(description);
443
444                // Also preprocess examples for searchable content
445                let examples_text: String = tool
446                    .examples
447                    .iter()
448                    .filter_map(|ex| ex.description.as_ref())
449                    .map(|d| preprocess_for_search(d))
450                    .collect::<Vec<_>>()
451                    .join(" ");
452
453                serde_json::json!({
454                    "id": id,
455                    "server": server,
456                    "name": tool.name,
457                    "aliases": tool.aliases.join(" "),
458                    "category": tool.category.as_deref().unwrap_or(""),
459                    "tags": tool.tags.join(" "),
460                    "summary": summary,
461                    "description": description,
462                    "params": tool.params.iter().map(|p| p.name.as_str()).collect::<Vec<_>>().join(" "),
463                    // Expanded fields for better search
464                    "expanded_summary": expanded_summary,
465                    "expanded_description": expanded_description,
466                    "expanded_examples": examples_text,
467                })
468            })
469            .collect();
470
471        let options = IndexOptions {
472            fields: vec![
473                "name".to_string(),
474                "aliases".to_string(),
475                "category".to_string(),
476                "tags".to_string(),
477                "summary".to_string(),
478                "description".to_string(),
479                "params".to_string(),
480                // Include expanded fields in search
481                "expanded_summary".to_string(),
482                "expanded_description".to_string(),
483                "expanded_examples".to_string(),
484            ],
485            id_field: Some("id".to_string()),
486            stopwords: STOP_WORDS.iter().map(|s| s.to_string()).collect(),
487            ..Default::default()
488        };
489
490        self.index = Some(Bm25Index::build(&docs, options));
491    }
492
493    /// Query tools across all registered servers
494    pub fn query(&self, query: &str, top_k: usize) -> Vec<ToolQueryResult> {
495        let Some(index) = &self.index else {
496            return Vec::new();
497        };
498
499        let results = index.search(query, top_k);
500
501        results
502            .into_iter()
503            .filter_map(|r| {
504                let (server, tool) = self.tools.get(&r.id)?;
505                Some(ToolQueryResult {
506                    id: r.id,
507                    server: server.clone(),
508                    tool: tool.clone(),
509                    score: r.score,
510                    matches: r.matches,
511                })
512            })
513            .collect()
514    }
515
516    /// Find tools similar to a given tool
517    pub fn similar(&self, tool_id: &str, top_k: usize) -> Vec<ToolQueryResult> {
518        let Some(index) = &self.index else {
519            return Vec::new();
520        };
521
522        let results = index.similar(tool_id, top_k);
523
524        results
525            .into_iter()
526            .filter_map(|r| {
527                let (server, tool) = self.tools.get(&r.id)?;
528                Some(ToolQueryResult {
529                    id: r.id,
530                    server: server.clone(),
531                    tool: tool.clone(),
532                    score: r.score,
533                    matches: r.matches,
534                })
535            })
536            .collect()
537    }
538
539    /// List all registered servers
540    pub fn list_servers(&self) -> Vec<ServerSummary> {
541        self.servers
542            .iter()
543            .map(|(name, spec)| ServerSummary {
544                name: name.clone(),
545                version: spec.server.version.clone(),
546                description: spec.server.description.clone(),
547                tool_count: spec.tools.len(),
548            })
549            .collect()
550    }
551
552    /// List all categories across all servers
553    pub fn list_categories(&self) -> HashMap<String, CategorySummary> {
554        let mut categories: HashMap<String, CategorySummary> = HashMap::new();
555
556        for (server, tool) in self.tools.values() {
557            if let Some(cat) = &tool.category {
558                let entry = categories.entry(cat.clone()).or_insert(CategorySummary {
559                    name: cat.clone(),
560                    tool_count: 0,
561                    servers: Vec::new(),
562                    subcategories: Vec::new(),
563                });
564                entry.tool_count += 1;
565                if !entry.servers.contains(server) {
566                    entry.servers.push(server.clone());
567                }
568                if let Some(subcat) = tool
569                    .subcategory
570                    .as_ref()
571                    .filter(|s| !entry.subcategories.contains(s))
572                {
573                    entry.subcategories.push(subcat.clone());
574                }
575            }
576        }
577
578        categories
579    }
580
581    /// Get index statistics
582    pub fn index_stats(&self) -> Option<IndexStats> {
583        let index = self.index.as_ref()?;
584
585        Some(IndexStats {
586            doc_count: index.doc_count,
587            term_count: index.terms.len(),
588            avg_doc_length: index.avg_doc_length,
589            server_count: self.servers.len(),
590            top_terms: index.terms().into_iter().take(20).collect(),
591        })
592    }
593
594    /// Get the discovery schema as JSON
595    pub fn get_schema() -> Value {
596        serde_json::json!({
597            "$schema": "http://json-schema.org/draft-07/schema#",
598            "$id": "https://jpx.dev/schemas/mcp-discovery/v1.json",
599            "title": "MCP Discovery Spec",
600            "description": "Schema for registering MCP server capabilities with jpx",
601            "type": "object",
602            "required": ["server", "tools"],
603            "properties": {
604                "$schema": {
605                    "type": "string",
606                    "description": "JSON Schema reference"
607                },
608                "server": {
609                    "type": "object",
610                    "required": ["name"],
611                    "properties": {
612                        "name": {"type": "string", "description": "Server name"},
613                        "version": {"type": "string", "description": "Server version"},
614                        "description": {"type": "string", "description": "Server description"}
615                    }
616                },
617                "tools": {
618                    "type": "array",
619                    "items": {
620                        "type": "object",
621                        "required": ["name"],
622                        "properties": {
623                            "name": {"type": "string", "description": "Tool name"},
624                            "aliases": {"type": "array", "items": {"type": "string"}},
625                            "category": {"type": "string"},
626                            "subcategory": {"type": "string"},
627                            "tags": {"type": "array", "items": {"type": "string"}},
628                            "summary": {"type": "string", "description": "Short summary"},
629                            "description": {"type": "string", "description": "Full description"},
630                            "params": {
631                                "type": "array",
632                                "items": {
633                                    "type": "object",
634                                    "required": ["name"],
635                                    "properties": {
636                                        "name": {"type": "string"},
637                                        "type": {"type": "string"},
638                                        "required": {"type": "boolean"},
639                                        "description": {"type": "string"},
640                                        "enum": {"type": "array", "items": {"type": "string"}},
641                                        "default": {}
642                                    }
643                                }
644                            },
645                            "returns": {
646                                "type": "object",
647                                "properties": {
648                                    "type": {"type": "string"},
649                                    "description": {"type": "string"}
650                                }
651                            },
652                            "examples": {
653                                "type": "array",
654                                "items": {
655                                    "type": "object",
656                                    "properties": {
657                                        "description": {"type": "string"},
658                                        "args": {},
659                                        "result": {}
660                                    }
661                                }
662                            },
663                            "related": {"type": "array", "items": {"type": "string"}},
664                            "since": {"type": "string"},
665                            "stability": {"type": "string", "enum": ["stable", "beta", "deprecated"]}
666                        }
667                    }
668                },
669                "categories": {
670                    "type": "object",
671                    "additionalProperties": {
672                        "type": "object",
673                        "properties": {
674                            "description": {"type": "string"},
675                            "subcategories": {"type": "array", "items": {"type": "string"}}
676                        }
677                    }
678                }
679            }
680        })
681    }
682}
683
684/// Result of registering a discovery spec
685#[derive(Debug, Clone, Serialize, Deserialize)]
686pub struct RegistrationResult {
687    pub ok: bool,
688    pub tools_indexed: usize,
689    pub warnings: Vec<String>,
690}
691
692/// Tool query result
693#[derive(Debug, Clone, Serialize, Deserialize)]
694pub struct ToolQueryResult {
695    pub id: String,
696    pub server: String,
697    pub tool: ToolSpec,
698    pub score: f64,
699    pub matches: HashMap<String, Vec<String>>,
700}
701
702/// Server summary for listing
703#[derive(Debug, Clone, Serialize, Deserialize)]
704pub struct ServerSummary {
705    pub name: String,
706    pub version: Option<String>,
707    pub description: Option<String>,
708    pub tool_count: usize,
709}
710
711/// Category summary
712#[derive(Debug, Clone, Serialize, Deserialize)]
713pub struct CategorySummary {
714    pub name: String,
715    pub tool_count: usize,
716    pub servers: Vec<String>,
717    pub subcategories: Vec<String>,
718}
719
720/// Index statistics
721#[derive(Debug, Clone, Serialize, Deserialize)]
722pub struct IndexStats {
723    pub doc_count: usize,
724    pub term_count: usize,
725    pub avg_doc_length: f64,
726    pub server_count: usize,
727    pub top_terms: Vec<(String, usize)>,
728}
729
730#[cfg(test)]
731mod tests {
732    use super::*;
733
734    fn sample_spec() -> DiscoverySpec {
735        serde_json::from_value(serde_json::json!({
736            "server": {
737                "name": "redisctl",
738                "version": "0.5.0",
739                "description": "Redis Enterprise management"
740            },
741            "tools": [
742                {
743                    "name": "create_cluster",
744                    "category": "clusters",
745                    "tags": ["write", "provisioning"],
746                    "summary": "Create a new Redis cluster",
747                    "description": "Creates a new Redis Enterprise cluster with specified configuration"
748                },
749                {
750                    "name": "delete_cluster",
751                    "category": "clusters",
752                    "tags": ["write", "destructive"],
753                    "summary": "Delete a cluster",
754                    "description": "Permanently deletes a Redis cluster"
755                },
756                {
757                    "name": "list_backups",
758                    "category": "backups",
759                    "tags": ["read"],
760                    "summary": "List all backups",
761                    "description": "Lists all available backups for a cluster"
762                }
763            ]
764        })).unwrap()
765    }
766
767    #[test]
768    fn test_register_spec() {
769        let mut registry = DiscoveryRegistry::new();
770        let spec = sample_spec();
771
772        let result = registry.register(spec, false);
773
774        assert!(result.ok);
775        assert_eq!(result.tools_indexed, 3);
776        assert!(result.warnings.is_empty());
777    }
778
779    #[test]
780    fn test_query_tools() {
781        let mut registry = DiscoveryRegistry::new();
782        registry.register(sample_spec(), false);
783
784        let results = registry.query("cluster", 10);
785
786        // All tools mention cluster in their descriptions, but cluster tools rank higher
787        assert!(!results.is_empty());
788        // Top results should be the cluster tools (they have "cluster" in name)
789        let top_names: Vec<_> = results
790            .iter()
791            .take(2)
792            .map(|r| r.tool.name.as_str())
793            .collect();
794        assert!(top_names.contains(&"create_cluster"));
795        assert!(top_names.contains(&"delete_cluster"));
796    }
797
798    #[test]
799    fn test_query_by_tag() {
800        let mut registry = DiscoveryRegistry::new();
801        registry.register(sample_spec(), false);
802
803        let results = registry.query("read", 10);
804
805        assert_eq!(results.len(), 1);
806        assert_eq!(results[0].tool.name, "list_backups");
807    }
808
809    #[test]
810    fn test_list_servers() {
811        let mut registry = DiscoveryRegistry::new();
812        registry.register(sample_spec(), false);
813
814        let servers = registry.list_servers();
815
816        assert_eq!(servers.len(), 1);
817        assert_eq!(servers[0].name, "redisctl");
818        assert_eq!(servers[0].tool_count, 3);
819    }
820
821    #[test]
822    fn test_list_categories() {
823        let mut registry = DiscoveryRegistry::new();
824        registry.register(sample_spec(), false);
825
826        let categories = registry.list_categories();
827
828        assert_eq!(categories.len(), 2);
829        assert!(categories.contains_key("clusters"));
830        assert!(categories.contains_key("backups"));
831        assert_eq!(categories.get("clusters").unwrap().tool_count, 2);
832    }
833
834    #[test]
835    fn test_unregister() {
836        let mut registry = DiscoveryRegistry::new();
837        registry.register(sample_spec(), false);
838
839        assert!(registry.unregister("redisctl"));
840        assert!(registry.list_servers().is_empty());
841        assert!(registry.query("cluster", 10).is_empty());
842    }
843
844    #[test]
845    fn test_replace_registration() {
846        let mut registry = DiscoveryRegistry::new();
847        registry.register(sample_spec(), false);
848
849        // Try to register again without replace - should fail
850        let result = registry.register(sample_spec(), false);
851        assert!(!result.ok);
852
853        // With replace - should succeed
854        let result = registry.register(sample_spec(), true);
855        assert!(result.ok);
856    }
857
858    #[test]
859    fn test_similar_tools() {
860        let mut registry = DiscoveryRegistry::new();
861        registry.register(sample_spec(), false);
862
863        let similar = registry.similar("redisctl:create_cluster", 10);
864
865        // delete_cluster should be similar (shares "cluster" terms)
866        assert!(!similar.is_empty());
867        assert_eq!(similar[0].tool.name, "delete_cluster");
868    }
869
870    #[test]
871    fn test_minimal_spec() {
872        let minimal: DiscoverySpec = serde_json::from_value(serde_json::json!({
873            "server": {"name": "minimal"},
874            "tools": [{"name": "foo"}]
875        }))
876        .unwrap();
877
878        let mut registry = DiscoveryRegistry::new();
879        let result = registry.register(minimal, false);
880
881        assert!(result.ok);
882        assert_eq!(result.tools_indexed, 1);
883    }
884
885    #[test]
886    fn test_get_schema() {
887        let schema = DiscoveryRegistry::get_schema();
888
889        assert!(schema.get("$schema").is_some());
890        assert!(schema.get("properties").is_some());
891    }
892
893    #[test]
894    fn test_index_stats() {
895        let mut registry = DiscoveryRegistry::new();
896        registry.register(sample_spec(), false);
897
898        let stats = registry.index_stats().unwrap();
899
900        assert_eq!(stats.doc_count, 3);
901        assert_eq!(stats.server_count, 1);
902        assert!(stats.term_count > 0);
903    }
904
905    // Preprocessing tests
906
907    #[test]
908    fn test_strip_jmespath_literals() {
909        // Basic backtick literal with JSON string
910        assert!(strip_jmespath_literals(r#"split text on `"\n"` newlines"#).contains("newline"));
911
912        // Backtick with escaped regex
913        let result = strip_jmespath_literals(r#"match `"\\d+"` digits"#);
914        assert!(result.contains("digit"));
915
916        // Multiple backticks
917        let result = strip_jmespath_literals(r#"use `"\t"` for tabs and `"\n"` for lines"#);
918        assert!(result.contains("tab"));
919        assert!(result.contains("newline"));
920
921        // Non-string backtick content preserved
922        let result = strip_jmespath_literals(r#"literal `123` number"#);
923        assert!(result.contains("123"));
924    }
925
926    #[test]
927    fn test_expand_escape_sequences() {
928        assert!(expand_escape_sequences(r"\n").contains("newline"));
929        assert!(expand_escape_sequences(r"\t").contains("tab"));
930        assert!(expand_escape_sequences(r"\d").contains("digit"));
931        assert!(expand_escape_sequences(r"\w").contains("word"));
932        assert!(expand_escape_sequences(r"\s").contains("whitespace"));
933    }
934
935    #[test]
936    fn test_expand_regex_patterns() {
937        assert!(expand_regex_patterns(r"\d+").contains("digits"));
938        assert!(expand_regex_patterns(r"\w+").contains("words"));
939        assert!(expand_regex_patterns(r"[0-9]").contains("digit"));
940        assert!(expand_regex_patterns(r"[a-zA-Z]").contains("letter"));
941        assert!(expand_regex_patterns(r".*").contains("any"));
942
943        // Metacharacters should be stripped
944        let result = expand_regex_patterns(r"foo[bar]+baz");
945        assert!(!result.contains('['));
946        assert!(!result.contains(']'));
947        assert!(!result.contains('+'));
948    }
949
950    #[test]
951    fn test_expand_identifiers() {
952        // snake_case should expand
953        let result = expand_identifiers("get_user_info");
954        assert!(result.contains("get"));
955        assert!(result.contains("user"));
956        assert!(result.contains("info"));
957        // Original preserved for exact match
958        assert!(result.contains("get_user_info"));
959
960        // camelCase should expand
961        let result = expand_identifiers("getUserInfo");
962        assert!(result.contains("get"));
963        assert!(result.contains("user"));
964        assert!(result.contains("info"));
965        // Original preserved
966        assert!(result.contains("getUserInfo"));
967
968        // Simple words unchanged
969        let result = expand_identifiers("simple");
970        assert!(result.contains("simple"));
971    }
972
973    #[test]
974    fn test_preprocess_for_search_integration() {
975        // Full preprocessing pipeline
976        let input = r#"Split on `"\n"` to get lines, use regex_extract for \d+ numbers"#;
977        let result = preprocess_for_search(input);
978
979        // Should contain expanded terms
980        assert!(result.contains("newline") || result.contains("linebreak"));
981        assert!(result.contains("digit") || result.contains("number"));
982        assert!(result.contains("regex"));
983        assert!(result.contains("extract"));
984
985        // Should not have excess whitespace
986        assert!(!result.contains("  "));
987    }
988
989    #[test]
990    fn test_preprocess_preserves_search_terms() {
991        // Make sure useful search terms aren't lost
992        let input = "Create a new database connection";
993        let result = preprocess_for_search(input);
994
995        assert!(result.contains("Create"));
996        assert!(result.contains("database"));
997        assert!(result.contains("connection"));
998    }
999
1000    #[test]
1001    fn test_search_with_preprocessed_content() {
1002        // Test that preprocessing improves search for escape-heavy descriptions
1003        let spec: DiscoverySpec = serde_json::from_value(serde_json::json!({
1004            "server": {"name": "text-tools"},
1005            "tools": [
1006                {
1007                    "name": "split_lines",
1008                    "summary": r#"Split text on newlines using `"\n"` delimiter"#,
1009                    "description": r#"Splits input string on newline characters. Use split(@, `"\n"`) syntax."#
1010                },
1011                {
1012                    "name": "extract_numbers",
1013                    "summary": r#"Extract numeric patterns with regex `"\\d+"`"#,
1014                    "description": r#"Uses regex_extract to find all \d+ digit sequences in text."#
1015                }
1016            ]
1017        }))
1018        .unwrap();
1019
1020        let mut registry = DiscoveryRegistry::new();
1021        registry.register(spec, false);
1022
1023        // Search for "newline" should find split_lines due to preprocessing
1024        let results = registry.query("newline", 10);
1025        assert!(!results.is_empty());
1026        assert_eq!(results[0].tool.name, "split_lines");
1027
1028        // Search for "digit" should find extract_numbers
1029        let results = registry.query("digit", 10);
1030        assert!(!results.is_empty());
1031        assert_eq!(results[0].tool.name, "extract_numbers");
1032    }
1033}