Skip to main content

jpx_engine/
discovery.rs

1//! Discovery Protocol implementation.
2//!
3//! This module implements a protocol for capability registration and search
4//! across servers. It uses BM25 search indexing for efficient tool discovery.
5//!
6//! # Discovery Spec
7//!
8//! Servers can register their tools using a structured discovery spec:
9//!
10//! ```json
11//! {
12//!   "server": {"name": "my-server", "version": "1.0.0"},
13//!   "tools": [
14//!     {"name": "my_tool", "description": "Does something useful", "tags": ["read"]}
15//!   ]
16//! }
17//! ```
18
19use crate::bm25::{Bm25Index, IndexOptions};
20use serde::{Deserialize, Serialize};
21use serde_json::Value;
22use std::collections::HashMap;
23
24#[cfg(feature = "schema")]
25use schemars::JsonSchema;
26
27/// Common English stop words to filter from search indexing.
28/// These words are too common to be useful for search relevance.
29const STOP_WORDS: &[&str] = &[
30    "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "he", "in", "is", "it",
31    "its", "of", "on", "or", "that", "the", "to", "was", "were", "will", "with", "this", "but",
32    "they", "have", "had", "what", "when", "where", "who", "which", "why", "how", "all", "each",
33    "every", "both", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only",
34    "own", "same", "so", "than", "too", "very", "just", "can", "could", "should", "would", "may",
35    "might", "must", "shall", "about", "above", "after", "again", "against", "below", "between",
36    "into", "through", "during", "before", "under", "over",
37];
38
39/// Preprocess text for search indexing.
40///
41/// This function cleans up text before indexing to improve search relevance:
42/// 1. Strips JMESPath literal syntax (backticks, escaped quotes)
43/// 2. Expands common regex patterns to natural language
44/// 3. Converts snake_case to separate words
45/// 4. Removes noise characters
46fn preprocess_for_search(text: &str) -> String {
47    let mut result = text.to_string();
48
49    // Strip JMESPath backtick literals: `"..."` -> ...
50    // This handles patterns like `"\n"` -> newline, `"\\d+"` -> digits
51    result = strip_jmespath_literals(&result);
52
53    // Expand common regex patterns to natural language
54    result = expand_regex_patterns(&result);
55
56    // Convert snake_case and camelCase to separate words
57    result = expand_identifiers(&result);
58
59    // Clean up extra whitespace
60    result.split_whitespace().collect::<Vec<_>>().join(" ")
61}
62
63/// Strip JMESPath backtick literal syntax from text.
64fn strip_jmespath_literals(text: &str) -> String {
65    let mut result = String::with_capacity(text.len());
66    let mut chars = text.chars().peekable();
67
68    while let Some(c) = chars.next() {
69        if c == '`' {
70            // Skip backtick and its contents, but extract meaningful parts
71            let mut inner = String::new();
72            for inner_c in chars.by_ref() {
73                if inner_c == '`' {
74                    break;
75                }
76                inner.push(inner_c);
77            }
78            // Extract content from JSON string if it looks like `"..."`
79            let trimmed = inner.trim();
80            if trimmed.starts_with('"') && trimmed.ends_with('"') {
81                let content = &trimmed[1..trimmed.len() - 1];
82                // Expand escape sequences to words
83                let expanded = expand_escape_sequences(content);
84                result.push(' ');
85                result.push_str(&expanded);
86                result.push(' ');
87            } else {
88                // Just include the inner content
89                result.push(' ');
90                result.push_str(trimmed);
91                result.push(' ');
92            }
93        } else {
94            result.push(c);
95        }
96    }
97
98    result
99}
100
101/// Expand escape sequences to natural language.
102fn expand_escape_sequences(text: &str) -> String {
103    text.replace("\\n", " newline linebreak ")
104        .replace("\\r", " return ")
105        .replace("\\t", " tab ")
106        .replace("\\s", " whitespace space ")
107        .replace("\\d", " digit number numeric ")
108        .replace("\\w", " word alphanumeric ")
109        .replace("\\b", " boundary ")
110        .replace("\\\\", " ")
111}
112
113/// Expand common regex patterns to natural language.
114fn expand_regex_patterns(text: &str) -> String {
115    text
116        // Common regex character classes
117        .replace("[0-9]", " digit number ")
118        .replace("[a-z]", " letter lowercase ")
119        .replace("[A-Z]", " letter uppercase ")
120        .replace("[a-zA-Z]", " letter alphabetic ")
121        .replace("[^>]", " ")
122        .replace(".*", " any anything ")
123        .replace(".+", " one more any ")
124        .replace("\\d+", " digits numbers numeric ")
125        .replace("\\w+", " words alphanumeric ")
126        .replace("\\s+", " whitespace spaces ")
127        .replace("\\S+", " nonwhitespace ")
128        // Clean up regex metacharacters
129        .replace(
130            ['[', ']', '(', ')', '{', '}', '*', '+', '?', '^', '$', '|'],
131            " ",
132        )
133}
134
135/// Expand snake_case and camelCase identifiers to separate words.
136fn expand_identifiers(text: &str) -> String {
137    let mut result = String::with_capacity(text.len() * 2);
138
139    for word in text.split_whitespace() {
140        // Handle snake_case
141        if word.contains('_') {
142            for part in word.split('_') {
143                if !part.is_empty() {
144                    result.push_str(part);
145                    result.push(' ');
146                }
147            }
148            // Also keep the original for exact matches
149            result.push_str(word);
150            result.push(' ');
151        }
152        // Handle camelCase (basic implementation)
153        else if word.chars().any(|c| c.is_uppercase()) && word.chars().any(|c| c.is_lowercase()) {
154            let mut prev_was_upper = false;
155            let mut current_word = String::new();
156
157            for c in word.chars() {
158                if c.is_uppercase() && !prev_was_upper && !current_word.is_empty() {
159                    result.push_str(&current_word.to_lowercase());
160                    result.push(' ');
161                    current_word.clear();
162                }
163                current_word.push(c);
164                prev_was_upper = c.is_uppercase();
165            }
166            if !current_word.is_empty() {
167                result.push_str(&current_word.to_lowercase());
168                result.push(' ');
169            }
170            // Also keep the original
171            result.push_str(word);
172            result.push(' ');
173        } else {
174            result.push_str(word);
175            result.push(' ');
176        }
177    }
178
179    result
180}
181
182/// Discovery spec - the schema MCP servers use to register their tools
183#[derive(Debug, Clone, Serialize, Deserialize)]
184#[cfg_attr(feature = "schema", derive(JsonSchema))]
185pub struct DiscoverySpec {
186    /// JSON Schema reference (optional)
187    #[serde(rename = "$schema", skip_serializing_if = "Option::is_none")]
188    pub schema: Option<String>,
189
190    /// Server metadata
191    pub server: ServerInfo,
192
193    /// List of tools provided by this server
194    pub tools: Vec<ToolSpec>,
195
196    /// Category definitions (optional)
197    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
198    pub categories: HashMap<String, CategoryInfo>,
199}
200
201/// Server metadata
202#[derive(Debug, Clone, Serialize, Deserialize)]
203#[cfg_attr(feature = "schema", derive(JsonSchema))]
204pub struct ServerInfo {
205    /// Server name (required)
206    pub name: String,
207
208    /// Server version (optional)
209    #[serde(skip_serializing_if = "Option::is_none")]
210    pub version: Option<String>,
211
212    /// Server description (optional)
213    #[serde(skip_serializing_if = "Option::is_none")]
214    pub description: Option<String>,
215}
216
217/// Tool specification
218#[derive(Debug, Clone, Serialize, Deserialize)]
219#[cfg_attr(feature = "schema", derive(JsonSchema))]
220pub struct ToolSpec {
221    /// Tool name (required)
222    pub name: String,
223
224    /// Alternative names/aliases
225    #[serde(default, skip_serializing_if = "Vec::is_empty")]
226    pub aliases: Vec<String>,
227
228    /// Primary category
229    #[serde(skip_serializing_if = "Option::is_none")]
230    pub category: Option<String>,
231
232    /// Subcategory within the primary category
233    #[serde(skip_serializing_if = "Option::is_none")]
234    pub subcategory: Option<String>,
235
236    /// Tags for filtering and search
237    #[serde(default, skip_serializing_if = "Vec::is_empty")]
238    pub tags: Vec<String>,
239
240    /// Short summary (for search results)
241    #[serde(skip_serializing_if = "Option::is_none")]
242    pub summary: Option<String>,
243
244    /// Full description
245    #[serde(skip_serializing_if = "Option::is_none")]
246    pub description: Option<String>,
247
248    /// Parameter definitions
249    #[serde(default, skip_serializing_if = "Vec::is_empty")]
250    pub params: Vec<ParamSpec>,
251
252    /// Return type information
253    #[serde(skip_serializing_if = "Option::is_none")]
254    pub returns: Option<ReturnSpec>,
255
256    /// Usage examples
257    #[serde(default, skip_serializing_if = "Vec::is_empty")]
258    pub examples: Vec<ExampleSpec>,
259
260    /// Related tools (author-declared relationships)
261    #[serde(default, skip_serializing_if = "Vec::is_empty")]
262    pub related: Vec<String>,
263
264    /// Version when tool was added
265    #[serde(skip_serializing_if = "Option::is_none")]
266    pub since: Option<String>,
267
268    /// Stability level (stable, beta, deprecated)
269    #[serde(skip_serializing_if = "Option::is_none")]
270    pub stability: Option<String>,
271}
272
273/// Parameter specification
274#[derive(Debug, Clone, Serialize, Deserialize)]
275#[cfg_attr(feature = "schema", derive(JsonSchema))]
276pub struct ParamSpec {
277    /// Parameter name
278    pub name: String,
279
280    /// Parameter type (string, number, boolean, object, array)
281    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
282    pub param_type: Option<String>,
283
284    /// Whether parameter is required
285    #[serde(default)]
286    pub required: bool,
287
288    /// Parameter description
289    #[serde(skip_serializing_if = "Option::is_none")]
290    pub description: Option<String>,
291
292    /// Allowed values (for enums)
293    #[serde(rename = "enum", skip_serializing_if = "Option::is_none")]
294    pub enum_values: Option<Vec<String>>,
295
296    /// Default value
297    #[serde(skip_serializing_if = "Option::is_none")]
298    pub default: Option<Value>,
299}
300
301/// Return type specification
302#[derive(Debug, Clone, Serialize, Deserialize)]
303#[cfg_attr(feature = "schema", derive(JsonSchema))]
304pub struct ReturnSpec {
305    /// Return type
306    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
307    pub return_type: Option<String>,
308
309    /// Description of return value
310    #[serde(skip_serializing_if = "Option::is_none")]
311    pub description: Option<String>,
312}
313
314/// Example specification
315#[derive(Debug, Clone, Serialize, Deserialize)]
316#[cfg_attr(feature = "schema", derive(JsonSchema))]
317pub struct ExampleSpec {
318    /// Example description
319    #[serde(skip_serializing_if = "Option::is_none")]
320    pub description: Option<String>,
321
322    /// Example arguments
323    #[serde(skip_serializing_if = "Option::is_none")]
324    pub args: Option<Value>,
325
326    /// Expected result (optional)
327    #[serde(skip_serializing_if = "Option::is_none")]
328    pub result: Option<Value>,
329}
330
331/// Category information
332#[derive(Debug, Clone, Serialize, Deserialize)]
333#[cfg_attr(feature = "schema", derive(JsonSchema))]
334pub struct CategoryInfo {
335    /// Category description
336    #[serde(skip_serializing_if = "Option::is_none")]
337    pub description: Option<String>,
338
339    /// Subcategories
340    #[serde(default, skip_serializing_if = "Vec::is_empty")]
341    pub subcategories: Vec<String>,
342}
343
344/// Discovery registry - holds registered specs and search index
345#[derive(Debug)]
346pub struct DiscoveryRegistry {
347    /// Registered servers: name -> spec
348    servers: HashMap<String, DiscoverySpec>,
349
350    /// All tools flattened for indexing: tool_id -> (server_name, tool_spec)
351    tools: HashMap<String, (String, ToolSpec)>,
352
353    /// BM25 search index (rebuilt on registration changes)
354    index: Option<Bm25Index>,
355}
356
357impl Default for DiscoveryRegistry {
358    fn default() -> Self {
359        Self::new()
360    }
361}
362
363impl DiscoveryRegistry {
364    /// Create a new empty registry
365    pub fn new() -> Self {
366        Self {
367            servers: HashMap::new(),
368            tools: HashMap::new(),
369            index: None,
370        }
371    }
372
373    /// Register a discovery spec
374    pub fn register(&mut self, spec: DiscoverySpec, replace: bool) -> RegistrationResult {
375        let server_name = spec.server.name.clone();
376
377        // Check if server already registered
378        if self.servers.contains_key(&server_name) && !replace {
379            return RegistrationResult {
380                ok: false,
381                tools_indexed: 0,
382                warnings: vec![format!(
383                    "Server '{}' already registered. Use replace=true to update.",
384                    server_name
385                )],
386            };
387        }
388
389        // Remove old tools from this server if replacing
390        if replace {
391            self.tools.retain(|_, (srv, _)| srv != &server_name);
392        }
393
394        // Add new tools
395        let mut warnings = Vec::new();
396        let mut tools_added = 0;
397
398        for tool in &spec.tools {
399            let tool_id = format!("{}:{}", server_name, tool.name);
400
401            if self.tools.contains_key(&tool_id) && !replace {
402                warnings.push(format!("Tool '{}' already exists, skipping", tool_id));
403                continue;
404            }
405
406            self.tools
407                .insert(tool_id, (server_name.clone(), tool.clone()));
408            tools_added += 1;
409        }
410
411        // Store the spec
412        self.servers.insert(server_name, spec);
413
414        // Rebuild the search index
415        self.rebuild_index();
416
417        RegistrationResult {
418            ok: true,
419            tools_indexed: tools_added,
420            warnings,
421        }
422    }
423
424    /// Unregister a server
425    pub fn unregister(&mut self, server_name: &str) -> bool {
426        if self.servers.remove(server_name).is_some() {
427            self.tools.retain(|_, (srv, _)| srv != server_name);
428            self.rebuild_index();
429            true
430        } else {
431            false
432        }
433    }
434
435    /// Rebuild the BM25 search index from all registered tools
436    fn rebuild_index(&mut self) {
437        if self.tools.is_empty() {
438            self.index = None;
439            return;
440        }
441
442        // Convert tools to indexable documents with preprocessed text
443        let docs: Vec<Value> = self
444            .tools
445            .iter()
446            .map(|(id, (server, tool))| {
447                let summary = tool.summary.as_deref().unwrap_or("");
448                let description = tool.description.as_deref().unwrap_or("");
449
450                // Preprocess text fields for better search
451                let expanded_summary = preprocess_for_search(summary);
452                let expanded_description = preprocess_for_search(description);
453
454                // Also preprocess examples for searchable content
455                let examples_text: String = tool
456                    .examples
457                    .iter()
458                    .filter_map(|ex| ex.description.as_ref())
459                    .map(|d| preprocess_for_search(d))
460                    .collect::<Vec<_>>()
461                    .join(" ");
462
463                serde_json::json!({
464                    "id": id,
465                    "server": server,
466                    "name": tool.name,
467                    "aliases": tool.aliases.join(" "),
468                    "category": tool.category.as_deref().unwrap_or(""),
469                    "tags": tool.tags.join(" "),
470                    "summary": summary,
471                    "description": description,
472                    "params": tool.params.iter().map(|p| p.name.as_str()).collect::<Vec<_>>().join(" "),
473                    // Expanded fields for better search
474                    "expanded_summary": expanded_summary,
475                    "expanded_description": expanded_description,
476                    "expanded_examples": examples_text,
477                })
478            })
479            .collect();
480
481        let options = IndexOptions {
482            fields: vec![
483                "name".to_string(),
484                "aliases".to_string(),
485                "category".to_string(),
486                "tags".to_string(),
487                "summary".to_string(),
488                "description".to_string(),
489                "params".to_string(),
490                // Include expanded fields in search
491                "expanded_summary".to_string(),
492                "expanded_description".to_string(),
493                "expanded_examples".to_string(),
494            ],
495            id_field: Some("id".to_string()),
496            stopwords: STOP_WORDS.iter().map(|s| s.to_string()).collect(),
497            ..Default::default()
498        };
499
500        self.index = Some(Bm25Index::build(&docs, options));
501    }
502
503    /// Query tools across all registered servers
504    pub fn query(&self, query: &str, top_k: usize) -> Vec<ToolQueryResult> {
505        let Some(index) = &self.index else {
506            return Vec::new();
507        };
508
509        let results = index.search(query, top_k);
510
511        results
512            .into_iter()
513            .filter_map(|r| {
514                let (server, tool) = self.tools.get(&r.id)?;
515                Some(ToolQueryResult {
516                    id: r.id,
517                    server: server.clone(),
518                    tool: tool.clone(),
519                    score: r.score,
520                    matches: r.matches,
521                })
522            })
523            .collect()
524    }
525
526    /// Find tools similar to a given tool
527    pub fn similar(&self, tool_id: &str, top_k: usize) -> Vec<ToolQueryResult> {
528        let Some(index) = &self.index else {
529            return Vec::new();
530        };
531
532        let results = index.similar(tool_id, top_k);
533
534        results
535            .into_iter()
536            .filter_map(|r| {
537                let (server, tool) = self.tools.get(&r.id)?;
538                Some(ToolQueryResult {
539                    id: r.id,
540                    server: server.clone(),
541                    tool: tool.clone(),
542                    score: r.score,
543                    matches: r.matches,
544                })
545            })
546            .collect()
547    }
548
549    /// List all registered servers
550    pub fn list_servers(&self) -> Vec<ServerSummary> {
551        self.servers
552            .iter()
553            .map(|(name, spec)| ServerSummary {
554                name: name.clone(),
555                version: spec.server.version.clone(),
556                description: spec.server.description.clone(),
557                tool_count: spec.tools.len(),
558            })
559            .collect()
560    }
561
562    /// List all categories across all servers
563    pub fn list_categories(&self) -> HashMap<String, CategorySummary> {
564        let mut categories: HashMap<String, CategorySummary> = HashMap::new();
565
566        for (server, tool) in self.tools.values() {
567            if let Some(cat) = &tool.category {
568                let entry = categories.entry(cat.clone()).or_insert(CategorySummary {
569                    name: cat.clone(),
570                    tool_count: 0,
571                    servers: Vec::new(),
572                    subcategories: Vec::new(),
573                });
574                entry.tool_count += 1;
575                if !entry.servers.contains(server) {
576                    entry.servers.push(server.clone());
577                }
578                if let Some(subcat) = tool
579                    .subcategory
580                    .as_ref()
581                    .filter(|s| !entry.subcategories.contains(s))
582                {
583                    entry.subcategories.push(subcat.clone());
584                }
585            }
586        }
587
588        categories
589    }
590
591    /// Get index statistics
592    pub fn index_stats(&self) -> Option<IndexStats> {
593        let index = self.index.as_ref()?;
594
595        Some(IndexStats {
596            doc_count: index.doc_count,
597            term_count: index.terms.len(),
598            avg_doc_length: index.avg_doc_length,
599            server_count: self.servers.len(),
600            top_terms: index.terms().into_iter().take(20).collect(),
601        })
602    }
603
604    /// Get the discovery schema as JSON
605    pub fn get_schema() -> Value {
606        serde_json::json!({
607            "$schema": "http://json-schema.org/draft-07/schema#",
608            "$id": "https://jpx.dev/schemas/mcp-discovery/v1.json",
609            "title": "MCP Discovery Spec",
610            "description": "Schema for registering MCP server capabilities with jpx",
611            "type": "object",
612            "required": ["server", "tools"],
613            "properties": {
614                "$schema": {
615                    "type": "string",
616                    "description": "JSON Schema reference"
617                },
618                "server": {
619                    "type": "object",
620                    "required": ["name"],
621                    "properties": {
622                        "name": {"type": "string", "description": "Server name"},
623                        "version": {"type": "string", "description": "Server version"},
624                        "description": {"type": "string", "description": "Server description"}
625                    }
626                },
627                "tools": {
628                    "type": "array",
629                    "items": {
630                        "type": "object",
631                        "required": ["name"],
632                        "properties": {
633                            "name": {"type": "string", "description": "Tool name"},
634                            "aliases": {"type": "array", "items": {"type": "string"}},
635                            "category": {"type": "string"},
636                            "subcategory": {"type": "string"},
637                            "tags": {"type": "array", "items": {"type": "string"}},
638                            "summary": {"type": "string", "description": "Short summary"},
639                            "description": {"type": "string", "description": "Full description"},
640                            "params": {
641                                "type": "array",
642                                "items": {
643                                    "type": "object",
644                                    "required": ["name"],
645                                    "properties": {
646                                        "name": {"type": "string"},
647                                        "type": {"type": "string"},
648                                        "required": {"type": "boolean"},
649                                        "description": {"type": "string"},
650                                        "enum": {"type": "array", "items": {"type": "string"}},
651                                        "default": {}
652                                    }
653                                }
654                            },
655                            "returns": {
656                                "type": "object",
657                                "properties": {
658                                    "type": {"type": "string"},
659                                    "description": {"type": "string"}
660                                }
661                            },
662                            "examples": {
663                                "type": "array",
664                                "items": {
665                                    "type": "object",
666                                    "properties": {
667                                        "description": {"type": "string"},
668                                        "args": {},
669                                        "result": {}
670                                    }
671                                }
672                            },
673                            "related": {"type": "array", "items": {"type": "string"}},
674                            "since": {"type": "string"},
675                            "stability": {"type": "string", "enum": ["stable", "beta", "deprecated"]}
676                        }
677                    }
678                },
679                "categories": {
680                    "type": "object",
681                    "additionalProperties": {
682                        "type": "object",
683                        "properties": {
684                            "description": {"type": "string"},
685                            "subcategories": {"type": "array", "items": {"type": "string"}}
686                        }
687                    }
688                }
689            }
690        })
691    }
692}
693
694/// Result of registering a discovery spec
695#[derive(Debug, Clone, Serialize, Deserialize)]
696pub struct RegistrationResult {
697    pub ok: bool,
698    pub tools_indexed: usize,
699    pub warnings: Vec<String>,
700}
701
702/// Tool query result
703#[derive(Debug, Clone, Serialize, Deserialize)]
704pub struct ToolQueryResult {
705    pub id: String,
706    pub server: String,
707    pub tool: ToolSpec,
708    pub score: f64,
709    pub matches: HashMap<String, Vec<String>>,
710}
711
712/// Server summary for listing
713#[derive(Debug, Clone, Serialize, Deserialize)]
714pub struct ServerSummary {
715    pub name: String,
716    pub version: Option<String>,
717    pub description: Option<String>,
718    pub tool_count: usize,
719}
720
721/// Category summary
722#[derive(Debug, Clone, Serialize, Deserialize)]
723pub struct CategorySummary {
724    pub name: String,
725    pub tool_count: usize,
726    pub servers: Vec<String>,
727    pub subcategories: Vec<String>,
728}
729
730/// Index statistics
731#[derive(Debug, Clone, Serialize, Deserialize)]
732pub struct IndexStats {
733    pub doc_count: usize,
734    pub term_count: usize,
735    pub avg_doc_length: f64,
736    pub server_count: usize,
737    pub top_terms: Vec<(String, usize)>,
738}
739
740#[cfg(test)]
741mod tests {
742    use super::*;
743
744    fn sample_spec() -> DiscoverySpec {
745        serde_json::from_value(serde_json::json!({
746            "server": {
747                "name": "redisctl",
748                "version": "0.5.0",
749                "description": "Redis Enterprise management"
750            },
751            "tools": [
752                {
753                    "name": "create_cluster",
754                    "category": "clusters",
755                    "tags": ["write", "provisioning"],
756                    "summary": "Create a new Redis cluster",
757                    "description": "Creates a new Redis Enterprise cluster with specified configuration"
758                },
759                {
760                    "name": "delete_cluster",
761                    "category": "clusters",
762                    "tags": ["write", "destructive"],
763                    "summary": "Delete a cluster",
764                    "description": "Permanently deletes a Redis cluster"
765                },
766                {
767                    "name": "list_backups",
768                    "category": "backups",
769                    "tags": ["read"],
770                    "summary": "List all backups",
771                    "description": "Lists all available backups for a cluster"
772                }
773            ]
774        })).unwrap()
775    }
776
777    #[test]
778    fn test_register_spec() {
779        let mut registry = DiscoveryRegistry::new();
780        let spec = sample_spec();
781
782        let result = registry.register(spec, false);
783
784        assert!(result.ok);
785        assert_eq!(result.tools_indexed, 3);
786        assert!(result.warnings.is_empty());
787    }
788
789    #[test]
790    fn test_query_tools() {
791        let mut registry = DiscoveryRegistry::new();
792        registry.register(sample_spec(), false);
793
794        let results = registry.query("cluster", 10);
795
796        // All tools mention cluster in their descriptions, but cluster tools rank higher
797        assert!(!results.is_empty());
798        // Top results should be the cluster tools (they have "cluster" in name)
799        let top_names: Vec<_> = results
800            .iter()
801            .take(2)
802            .map(|r| r.tool.name.as_str())
803            .collect();
804        assert!(top_names.contains(&"create_cluster"));
805        assert!(top_names.contains(&"delete_cluster"));
806    }
807
808    #[test]
809    fn test_query_by_tag() {
810        let mut registry = DiscoveryRegistry::new();
811        registry.register(sample_spec(), false);
812
813        let results = registry.query("read", 10);
814
815        assert_eq!(results.len(), 1);
816        assert_eq!(results[0].tool.name, "list_backups");
817    }
818
819    #[test]
820    fn test_list_servers() {
821        let mut registry = DiscoveryRegistry::new();
822        registry.register(sample_spec(), false);
823
824        let servers = registry.list_servers();
825
826        assert_eq!(servers.len(), 1);
827        assert_eq!(servers[0].name, "redisctl");
828        assert_eq!(servers[0].tool_count, 3);
829    }
830
831    #[test]
832    fn test_list_categories() {
833        let mut registry = DiscoveryRegistry::new();
834        registry.register(sample_spec(), false);
835
836        let categories = registry.list_categories();
837
838        assert_eq!(categories.len(), 2);
839        assert!(categories.contains_key("clusters"));
840        assert!(categories.contains_key("backups"));
841        assert_eq!(categories.get("clusters").unwrap().tool_count, 2);
842    }
843
844    #[test]
845    fn test_unregister() {
846        let mut registry = DiscoveryRegistry::new();
847        registry.register(sample_spec(), false);
848
849        assert!(registry.unregister("redisctl"));
850        assert!(registry.list_servers().is_empty());
851        assert!(registry.query("cluster", 10).is_empty());
852    }
853
854    #[test]
855    fn test_replace_registration() {
856        let mut registry = DiscoveryRegistry::new();
857        registry.register(sample_spec(), false);
858
859        // Try to register again without replace - should fail
860        let result = registry.register(sample_spec(), false);
861        assert!(!result.ok);
862
863        // With replace - should succeed
864        let result = registry.register(sample_spec(), true);
865        assert!(result.ok);
866    }
867
868    #[test]
869    fn test_similar_tools() {
870        let mut registry = DiscoveryRegistry::new();
871        registry.register(sample_spec(), false);
872
873        let similar = registry.similar("redisctl:create_cluster", 10);
874
875        // delete_cluster should be similar (shares "cluster" terms)
876        assert!(!similar.is_empty());
877        assert_eq!(similar[0].tool.name, "delete_cluster");
878    }
879
880    #[test]
881    fn test_minimal_spec() {
882        let minimal: DiscoverySpec = serde_json::from_value(serde_json::json!({
883            "server": {"name": "minimal"},
884            "tools": [{"name": "foo"}]
885        }))
886        .unwrap();
887
888        let mut registry = DiscoveryRegistry::new();
889        let result = registry.register(minimal, false);
890
891        assert!(result.ok);
892        assert_eq!(result.tools_indexed, 1);
893    }
894
895    #[test]
896    fn test_get_schema() {
897        let schema = DiscoveryRegistry::get_schema();
898
899        assert!(schema.get("$schema").is_some());
900        assert!(schema.get("properties").is_some());
901    }
902
903    #[test]
904    fn test_index_stats() {
905        let mut registry = DiscoveryRegistry::new();
906        registry.register(sample_spec(), false);
907
908        let stats = registry.index_stats().unwrap();
909
910        assert_eq!(stats.doc_count, 3);
911        assert_eq!(stats.server_count, 1);
912        assert!(stats.term_count > 0);
913    }
914
915    // Preprocessing tests
916
917    #[test]
918    fn test_strip_jmespath_literals() {
919        // Basic backtick literal with JSON string
920        assert!(strip_jmespath_literals(r#"split text on `"\n"` newlines"#).contains("newline"));
921
922        // Backtick with escaped regex
923        let result = strip_jmespath_literals(r#"match `"\\d+"` digits"#);
924        assert!(result.contains("digit"));
925
926        // Multiple backticks
927        let result = strip_jmespath_literals(r#"use `"\t"` for tabs and `"\n"` for lines"#);
928        assert!(result.contains("tab"));
929        assert!(result.contains("newline"));
930
931        // Non-string backtick content preserved
932        let result = strip_jmespath_literals(r#"literal `123` number"#);
933        assert!(result.contains("123"));
934    }
935
936    #[test]
937    fn test_expand_escape_sequences() {
938        assert!(expand_escape_sequences(r"\n").contains("newline"));
939        assert!(expand_escape_sequences(r"\t").contains("tab"));
940        assert!(expand_escape_sequences(r"\d").contains("digit"));
941        assert!(expand_escape_sequences(r"\w").contains("word"));
942        assert!(expand_escape_sequences(r"\s").contains("whitespace"));
943    }
944
945    #[test]
946    fn test_expand_regex_patterns() {
947        assert!(expand_regex_patterns(r"\d+").contains("digits"));
948        assert!(expand_regex_patterns(r"\w+").contains("words"));
949        assert!(expand_regex_patterns(r"[0-9]").contains("digit"));
950        assert!(expand_regex_patterns(r"[a-zA-Z]").contains("letter"));
951        assert!(expand_regex_patterns(r".*").contains("any"));
952
953        // Metacharacters should be stripped
954        let result = expand_regex_patterns(r"foo[bar]+baz");
955        assert!(!result.contains('['));
956        assert!(!result.contains(']'));
957        assert!(!result.contains('+'));
958    }
959
960    #[test]
961    fn test_expand_identifiers() {
962        // snake_case should expand
963        let result = expand_identifiers("get_user_info");
964        assert!(result.contains("get"));
965        assert!(result.contains("user"));
966        assert!(result.contains("info"));
967        // Original preserved for exact match
968        assert!(result.contains("get_user_info"));
969
970        // camelCase should expand
971        let result = expand_identifiers("getUserInfo");
972        assert!(result.contains("get"));
973        assert!(result.contains("user"));
974        assert!(result.contains("info"));
975        // Original preserved
976        assert!(result.contains("getUserInfo"));
977
978        // Simple words unchanged
979        let result = expand_identifiers("simple");
980        assert!(result.contains("simple"));
981    }
982
983    #[test]
984    fn test_preprocess_for_search_integration() {
985        // Full preprocessing pipeline
986        let input = r#"Split on `"\n"` to get lines, use regex_extract for \d+ numbers"#;
987        let result = preprocess_for_search(input);
988
989        // Should contain expanded terms
990        assert!(result.contains("newline") || result.contains("linebreak"));
991        assert!(result.contains("digit") || result.contains("number"));
992        assert!(result.contains("regex"));
993        assert!(result.contains("extract"));
994
995        // Should not have excess whitespace
996        assert!(!result.contains("  "));
997    }
998
999    #[test]
1000    fn test_preprocess_preserves_search_terms() {
1001        // Make sure useful search terms aren't lost
1002        let input = "Create a new database connection";
1003        let result = preprocess_for_search(input);
1004
1005        assert!(result.contains("Create"));
1006        assert!(result.contains("database"));
1007        assert!(result.contains("connection"));
1008    }
1009
1010    #[test]
1011    fn test_search_with_preprocessed_content() {
1012        // Test that preprocessing improves search for escape-heavy descriptions
1013        let spec: DiscoverySpec = serde_json::from_value(serde_json::json!({
1014            "server": {"name": "text-tools"},
1015            "tools": [
1016                {
1017                    "name": "split_lines",
1018                    "summary": r#"Split text on newlines using `"\n"` delimiter"#,
1019                    "description": r#"Splits input string on newline characters. Use split(@, `"\n"`) syntax."#
1020                },
1021                {
1022                    "name": "extract_numbers",
1023                    "summary": r#"Extract numeric patterns with regex `"\\d+"`"#,
1024                    "description": r#"Uses regex_extract to find all \d+ digit sequences in text."#
1025                }
1026            ]
1027        }))
1028        .unwrap();
1029
1030        let mut registry = DiscoveryRegistry::new();
1031        registry.register(spec, false);
1032
1033        // Search for "newline" should find split_lines due to preprocessing
1034        let results = registry.query("newline", 10);
1035        assert!(!results.is_empty());
1036        assert_eq!(results[0].tool.name, "split_lines");
1037
1038        // Search for "digit" should find extract_numbers
1039        let results = registry.query("digit", 10);
1040        assert!(!results.is_empty());
1041        assert_eq!(results[0].tool.name, "extract_numbers");
1042    }
1043}