Skip to main content

jpx_engine/
discovery.rs

1//! Discovery Protocol implementation.
2//!
3//! This module implements a protocol for capability registration and search
4//! across servers. It uses BM25 search indexing for efficient tool discovery.
5//!
6//! # Discovery Spec
7//!
8//! Servers can register their tools using a structured discovery spec:
9//!
10//! ```json
11//! {
12//!   "server": {"name": "my-server", "version": "1.0.0"},
13//!   "tools": [
14//!     {"name": "my_tool", "description": "Does something useful", "tags": ["read"]}
15//!   ]
16//! }
17//! ```
18
19use crate::bm25::{Bm25Index, IndexOptions};
20use serde::{Deserialize, Serialize};
21use serde_json::Value;
22use std::collections::HashMap;
23
24#[cfg(feature = "schema")]
25use schemars::JsonSchema;
26
27/// Common English stop words to filter from search indexing.
28/// These words are too common to be useful for search relevance.
29const STOP_WORDS: &[&str] = &[
30    "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "he", "in", "is", "it",
31    "its", "of", "on", "or", "that", "the", "to", "was", "were", "will", "with", "this", "but",
32    "they", "have", "had", "what", "when", "where", "who", "which", "why", "how", "all", "each",
33    "every", "both", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only",
34    "own", "same", "so", "than", "too", "very", "just", "can", "could", "should", "would", "may",
35    "might", "must", "shall", "about", "above", "after", "again", "against", "below", "between",
36    "into", "through", "during", "before", "under", "over",
37];
38
39/// Preprocess text for search indexing.
40///
41/// This function cleans up text before indexing to improve search relevance:
42/// 1. Strips JMESPath literal syntax (backticks, escaped quotes)
43/// 2. Expands common regex patterns to natural language
44/// 3. Converts snake_case to separate words
45/// 4. Removes noise characters
46fn preprocess_for_search(text: &str) -> String {
47    let mut result = text.to_string();
48
49    // Strip JMESPath backtick literals: `"..."` -> ...
50    // This handles patterns like `"\n"` -> newline, `"\\d+"` -> digits
51    result = strip_jmespath_literals(&result);
52
53    // Expand common regex patterns to natural language
54    result = expand_regex_patterns(&result);
55
56    // Convert snake_case and camelCase to separate words
57    result = expand_identifiers(&result);
58
59    // Clean up extra whitespace
60    result.split_whitespace().collect::<Vec<_>>().join(" ")
61}
62
63/// Strip JMESPath backtick literal syntax from text.
64fn strip_jmespath_literals(text: &str) -> String {
65    let mut result = String::with_capacity(text.len());
66    let mut chars = text.chars().peekable();
67
68    while let Some(c) = chars.next() {
69        if c == '`' {
70            // Skip backtick and its contents, but extract meaningful parts
71            let mut inner = String::new();
72            for inner_c in chars.by_ref() {
73                if inner_c == '`' {
74                    break;
75                }
76                inner.push(inner_c);
77            }
78            // Extract content from JSON string if it looks like `"..."`
79            let trimmed = inner.trim();
80            if trimmed.starts_with('"') && trimmed.ends_with('"') {
81                let content = &trimmed[1..trimmed.len() - 1];
82                // Expand escape sequences to words
83                let expanded = expand_escape_sequences(content);
84                result.push(' ');
85                result.push_str(&expanded);
86                result.push(' ');
87            } else {
88                // Just include the inner content
89                result.push(' ');
90                result.push_str(trimmed);
91                result.push(' ');
92            }
93        } else {
94            result.push(c);
95        }
96    }
97
98    result
99}
100
101/// Expand escape sequences to natural language.
102fn expand_escape_sequences(text: &str) -> String {
103    text.replace("\\n", " newline linebreak ")
104        .replace("\\r", " return ")
105        .replace("\\t", " tab ")
106        .replace("\\s", " whitespace space ")
107        .replace("\\d", " digit number numeric ")
108        .replace("\\w", " word alphanumeric ")
109        .replace("\\b", " boundary ")
110        .replace("\\\\", " ")
111}
112
113/// Expand common regex patterns to natural language.
114fn expand_regex_patterns(text: &str) -> String {
115    text
116        // Common regex character classes
117        .replace("[0-9]", " digit number ")
118        .replace("[a-z]", " letter lowercase ")
119        .replace("[A-Z]", " letter uppercase ")
120        .replace("[a-zA-Z]", " letter alphabetic ")
121        .replace("[^>]", " ")
122        .replace(".*", " any anything ")
123        .replace(".+", " one more any ")
124        .replace("\\d+", " digits numbers numeric ")
125        .replace("\\w+", " words alphanumeric ")
126        .replace("\\s+", " whitespace spaces ")
127        .replace("\\S+", " nonwhitespace ")
128        // Clean up regex metacharacters
129        .replace(
130            ['[', ']', '(', ')', '{', '}', '*', '+', '?', '^', '$', '|'],
131            " ",
132        )
133}
134
135/// Expand snake_case and camelCase identifiers to separate words.
136fn expand_identifiers(text: &str) -> String {
137    let mut result = String::with_capacity(text.len() * 2);
138
139    for word in text.split_whitespace() {
140        // Handle snake_case
141        if word.contains('_') {
142            for part in word.split('_') {
143                if !part.is_empty() {
144                    result.push_str(part);
145                    result.push(' ');
146                }
147            }
148            // Also keep the original for exact matches
149            result.push_str(word);
150            result.push(' ');
151        }
152        // Handle camelCase (basic implementation)
153        else if word.chars().any(|c| c.is_uppercase()) && word.chars().any(|c| c.is_lowercase()) {
154            let mut prev_was_upper = false;
155            let mut current_word = String::new();
156
157            for c in word.chars() {
158                if c.is_uppercase() && !prev_was_upper && !current_word.is_empty() {
159                    result.push_str(&current_word.to_lowercase());
160                    result.push(' ');
161                    current_word.clear();
162                }
163                current_word.push(c);
164                prev_was_upper = c.is_uppercase();
165            }
166            if !current_word.is_empty() {
167                result.push_str(&current_word.to_lowercase());
168                result.push(' ');
169            }
170            // Also keep the original
171            result.push_str(word);
172            result.push(' ');
173        } else {
174            result.push_str(word);
175            result.push(' ');
176        }
177    }
178
179    result
180}
181
182/// Discovery spec - the schema MCP servers use to register their tools
183#[derive(Debug, Clone, Serialize, Deserialize)]
184#[cfg_attr(feature = "schema", derive(JsonSchema))]
185pub struct DiscoverySpec {
186    /// JSON Schema reference (optional)
187    #[serde(rename = "$schema", skip_serializing_if = "Option::is_none")]
188    pub schema: Option<String>,
189
190    /// Server metadata
191    pub server: ServerInfo,
192
193    /// List of tools provided by this server
194    pub tools: Vec<ToolSpec>,
195
196    /// Category definitions (optional)
197    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
198    pub categories: HashMap<String, CategoryInfo>,
199}
200
201/// Server metadata
202#[derive(Debug, Clone, Serialize, Deserialize)]
203#[cfg_attr(feature = "schema", derive(JsonSchema))]
204pub struct ServerInfo {
205    /// Server name (required)
206    pub name: String,
207
208    /// Server version (optional)
209    #[serde(skip_serializing_if = "Option::is_none")]
210    pub version: Option<String>,
211
212    /// Server description (optional)
213    #[serde(skip_serializing_if = "Option::is_none")]
214    pub description: Option<String>,
215}
216
217/// Tool specification
218#[derive(Debug, Clone, Serialize, Deserialize)]
219#[cfg_attr(feature = "schema", derive(JsonSchema))]
220pub struct ToolSpec {
221    /// Tool name (required)
222    pub name: String,
223
224    /// Alternative names/aliases
225    #[serde(default, skip_serializing_if = "Vec::is_empty")]
226    pub aliases: Vec<String>,
227
228    /// Primary category
229    #[serde(skip_serializing_if = "Option::is_none")]
230    pub category: Option<String>,
231
232    /// Subcategory within the primary category
233    #[serde(skip_serializing_if = "Option::is_none")]
234    pub subcategory: Option<String>,
235
236    /// Tags for filtering and search
237    #[serde(default, skip_serializing_if = "Vec::is_empty")]
238    pub tags: Vec<String>,
239
240    /// Short summary (for search results)
241    #[serde(skip_serializing_if = "Option::is_none")]
242    pub summary: Option<String>,
243
244    /// Full description
245    #[serde(skip_serializing_if = "Option::is_none")]
246    pub description: Option<String>,
247
248    /// Parameter definitions
249    #[serde(default, skip_serializing_if = "Vec::is_empty")]
250    pub params: Vec<ParamSpec>,
251
252    /// Return type information
253    #[serde(skip_serializing_if = "Option::is_none")]
254    pub returns: Option<ReturnSpec>,
255
256    /// Usage examples
257    #[serde(default, skip_serializing_if = "Vec::is_empty")]
258    pub examples: Vec<ExampleSpec>,
259
260    /// Related tools (author-declared relationships)
261    #[serde(default, skip_serializing_if = "Vec::is_empty")]
262    pub related: Vec<String>,
263
264    /// Version when tool was added
265    #[serde(skip_serializing_if = "Option::is_none")]
266    pub since: Option<String>,
267
268    /// Stability level (stable, beta, deprecated)
269    #[serde(skip_serializing_if = "Option::is_none")]
270    pub stability: Option<String>,
271}
272
273/// Parameter specification
274#[derive(Debug, Clone, Serialize, Deserialize)]
275#[cfg_attr(feature = "schema", derive(JsonSchema))]
276pub struct ParamSpec {
277    /// Parameter name
278    pub name: String,
279
280    /// Parameter type (string, number, boolean, object, array)
281    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
282    pub param_type: Option<String>,
283
284    /// Whether parameter is required
285    #[serde(default)]
286    pub required: bool,
287
288    /// Parameter description
289    #[serde(skip_serializing_if = "Option::is_none")]
290    pub description: Option<String>,
291
292    /// Allowed values (for enums)
293    #[serde(rename = "enum", skip_serializing_if = "Option::is_none")]
294    pub enum_values: Option<Vec<String>>,
295
296    /// Default value
297    #[serde(skip_serializing_if = "Option::is_none")]
298    pub default: Option<Value>,
299}
300
301/// Return type specification
302#[derive(Debug, Clone, Serialize, Deserialize)]
303#[cfg_attr(feature = "schema", derive(JsonSchema))]
304pub struct ReturnSpec {
305    /// Return type
306    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
307    pub return_type: Option<String>,
308
309    /// Description of return value
310    #[serde(skip_serializing_if = "Option::is_none")]
311    pub description: Option<String>,
312}
313
314/// Example specification
315#[derive(Debug, Clone, Serialize, Deserialize)]
316#[cfg_attr(feature = "schema", derive(JsonSchema))]
317pub struct ExampleSpec {
318    /// Example description
319    #[serde(skip_serializing_if = "Option::is_none")]
320    pub description: Option<String>,
321
322    /// Example arguments
323    #[serde(skip_serializing_if = "Option::is_none")]
324    pub args: Option<Value>,
325
326    /// Expected result (optional)
327    #[serde(skip_serializing_if = "Option::is_none")]
328    pub result: Option<Value>,
329}
330
331/// Category information
332#[derive(Debug, Clone, Serialize, Deserialize)]
333#[cfg_attr(feature = "schema", derive(JsonSchema))]
334pub struct CategoryInfo {
335    /// Category description
336    #[serde(skip_serializing_if = "Option::is_none")]
337    pub description: Option<String>,
338
339    /// Subcategories
340    #[serde(default, skip_serializing_if = "Vec::is_empty")]
341    pub subcategories: Vec<String>,
342}
343
344/// Discovery registry - holds registered specs and search index
345#[derive(Debug)]
346pub struct DiscoveryRegistry {
347    /// Registered servers: name -> spec
348    servers: HashMap<String, DiscoverySpec>,
349
350    /// All tools flattened for indexing: tool_id -> (server_name, tool_spec)
351    tools: HashMap<String, (String, ToolSpec)>,
352
353    /// BM25 search index (rebuilt on registration changes)
354    index: Option<Bm25Index>,
355}
356
357impl Default for DiscoveryRegistry {
358    fn default() -> Self {
359        Self::new()
360    }
361}
362
363impl DiscoveryRegistry {
364    /// Create a new empty registry
365    pub fn new() -> Self {
366        Self {
367            servers: HashMap::new(),
368            tools: HashMap::new(),
369            index: None,
370        }
371    }
372
373    /// Register a discovery spec
374    pub fn register(&mut self, spec: DiscoverySpec, replace: bool) -> RegistrationResult {
375        let server_name = spec.server.name.clone();
376
377        // Check if server already registered
378        if self.servers.contains_key(&server_name) && !replace {
379            return RegistrationResult {
380                ok: false,
381                tools_indexed: 0,
382                warnings: vec![format!(
383                    "Server '{}' already registered. Use replace=true to update.",
384                    server_name
385                )],
386            };
387        }
388
389        // Remove old tools from this server if replacing
390        if replace {
391            self.tools.retain(|_, (srv, _)| srv != &server_name);
392        }
393
394        // Add new tools
395        let mut warnings = Vec::new();
396        let mut tools_added = 0;
397
398        for tool in &spec.tools {
399            let tool_id = format!("{}:{}", server_name, tool.name);
400
401            if self.tools.contains_key(&tool_id) && !replace {
402                warnings.push(format!("Tool '{}' already exists, skipping", tool_id));
403                continue;
404            }
405
406            self.tools
407                .insert(tool_id, (server_name.clone(), tool.clone()));
408            tools_added += 1;
409        }
410
411        // Store the spec
412        self.servers.insert(server_name, spec);
413
414        // Rebuild the search index
415        self.rebuild_index();
416
417        RegistrationResult {
418            ok: true,
419            tools_indexed: tools_added,
420            warnings,
421        }
422    }
423
424    /// Unregister a server
425    pub fn unregister(&mut self, server_name: &str) -> bool {
426        if self.servers.remove(server_name).is_some() {
427            self.tools.retain(|_, (srv, _)| srv != server_name);
428            self.rebuild_index();
429            true
430        } else {
431            false
432        }
433    }
434
435    /// Rebuild the BM25 search index from all registered tools
436    fn rebuild_index(&mut self) {
437        if self.tools.is_empty() {
438            self.index = None;
439            return;
440        }
441
442        // Convert tools to indexable documents with preprocessed text
443        let docs: Vec<Value> = self
444            .tools
445            .iter()
446            .map(|(id, (server, tool))| {
447                let summary = tool.summary.as_deref().unwrap_or("");
448                let description = tool.description.as_deref().unwrap_or("");
449
450                // Preprocess text fields for better search
451                let expanded_summary = preprocess_for_search(summary);
452                let expanded_description = preprocess_for_search(description);
453
454                // Also preprocess examples for searchable content
455                let examples_text: String = tool
456                    .examples
457                    .iter()
458                    .filter_map(|ex| ex.description.as_ref())
459                    .map(|d| preprocess_for_search(d))
460                    .collect::<Vec<_>>()
461                    .join(" ");
462
463                serde_json::json!({
464                    "id": id,
465                    "server": server,
466                    "name": tool.name,
467                    "aliases": tool.aliases.join(" "),
468                    "category": tool.category.as_deref().unwrap_or(""),
469                    "tags": tool.tags.join(" "),
470                    "summary": summary,
471                    "description": description,
472                    "params": tool.params.iter().map(|p| p.name.as_str()).collect::<Vec<_>>().join(" "),
473                    // Expanded fields for better search
474                    "expanded_summary": expanded_summary,
475                    "expanded_description": expanded_description,
476                    "expanded_examples": examples_text,
477                })
478            })
479            .collect();
480
481        let options = IndexOptions {
482            fields: vec![
483                "name".to_string(),
484                "aliases".to_string(),
485                "category".to_string(),
486                "tags".to_string(),
487                "summary".to_string(),
488                "description".to_string(),
489                "params".to_string(),
490                // Include expanded fields in search
491                "expanded_summary".to_string(),
492                "expanded_description".to_string(),
493                "expanded_examples".to_string(),
494            ],
495            id_field: Some("id".to_string()),
496            stopwords: STOP_WORDS.iter().map(|s| s.to_string()).collect(),
497            ..Default::default()
498        };
499
500        self.index = Some(Bm25Index::build(&docs, options));
501    }
502
503    /// Query tools across all registered servers
504    pub fn query(&self, query: &str, top_k: usize) -> Vec<ToolQueryResult> {
505        let Some(index) = &self.index else {
506            return Vec::new();
507        };
508
509        let results = index.search(query, top_k);
510
511        results
512            .into_iter()
513            .filter_map(|r| {
514                let (server, tool) = self.tools.get(&r.id)?;
515                Some(ToolQueryResult {
516                    id: r.id,
517                    server: server.clone(),
518                    tool: tool.clone(),
519                    score: r.score,
520                    matches: r.matches,
521                })
522            })
523            .collect()
524    }
525
526    /// Find tools similar to a given tool
527    pub fn similar(&self, tool_id: &str, top_k: usize) -> Vec<ToolQueryResult> {
528        let Some(index) = &self.index else {
529            return Vec::new();
530        };
531
532        let results = index.similar(tool_id, top_k);
533
534        results
535            .into_iter()
536            .filter_map(|r| {
537                let (server, tool) = self.tools.get(&r.id)?;
538                Some(ToolQueryResult {
539                    id: r.id,
540                    server: server.clone(),
541                    tool: tool.clone(),
542                    score: r.score,
543                    matches: r.matches,
544                })
545            })
546            .collect()
547    }
548
549    /// List all registered servers
550    pub fn list_servers(&self) -> Vec<ServerSummary> {
551        self.servers
552            .iter()
553            .map(|(name, spec)| ServerSummary {
554                name: name.clone(),
555                version: spec.server.version.clone(),
556                description: spec.server.description.clone(),
557                tool_count: spec.tools.len(),
558            })
559            .collect()
560    }
561
562    /// List all categories across all servers
563    pub fn list_categories(&self) -> HashMap<String, CategorySummary> {
564        let mut categories: HashMap<String, CategorySummary> = HashMap::new();
565
566        for (server, tool) in self.tools.values() {
567            if let Some(cat) = &tool.category {
568                let entry = categories.entry(cat.clone()).or_insert(CategorySummary {
569                    name: cat.clone(),
570                    tool_count: 0,
571                    servers: Vec::new(),
572                    subcategories: Vec::new(),
573                });
574                entry.tool_count += 1;
575                if !entry.servers.contains(server) {
576                    entry.servers.push(server.clone());
577                }
578                if let Some(subcat) = tool
579                    .subcategory
580                    .as_ref()
581                    .filter(|s| !entry.subcategories.contains(s))
582                {
583                    entry.subcategories.push(subcat.clone());
584                }
585            }
586        }
587
588        categories
589    }
590
591    /// Get index statistics
592    pub fn index_stats(&self) -> Option<IndexStats> {
593        let index = self.index.as_ref()?;
594
595        Some(IndexStats {
596            doc_count: index.doc_count,
597            term_count: index.terms.len(),
598            avg_doc_length: index.avg_doc_length,
599            server_count: self.servers.len(),
600            top_terms: index.terms().into_iter().take(20).collect(),
601        })
602    }
603
604    /// Get the discovery schema as JSON
605    pub fn get_schema() -> Value {
606        serde_json::json!({
607            "$schema": "http://json-schema.org/draft-07/schema#",
608            "$id": "https://jpx.dev/schemas/mcp-discovery/v1.json",
609            "title": "MCP Discovery Spec",
610            "description": "Schema for registering MCP server capabilities with jpx",
611            "type": "object",
612            "required": ["server", "tools"],
613            "properties": {
614                "$schema": {
615                    "type": "string",
616                    "description": "JSON Schema reference"
617                },
618                "server": {
619                    "type": "object",
620                    "required": ["name"],
621                    "properties": {
622                        "name": {"type": "string", "description": "Server name"},
623                        "version": {"type": "string", "description": "Server version"},
624                        "description": {"type": "string", "description": "Server description"}
625                    }
626                },
627                "tools": {
628                    "type": "array",
629                    "items": {
630                        "type": "object",
631                        "required": ["name"],
632                        "properties": {
633                            "name": {"type": "string", "description": "Tool name"},
634                            "aliases": {"type": "array", "items": {"type": "string"}},
635                            "category": {"type": "string"},
636                            "subcategory": {"type": "string"},
637                            "tags": {"type": "array", "items": {"type": "string"}},
638                            "summary": {"type": "string", "description": "Short summary"},
639                            "description": {"type": "string", "description": "Full description"},
640                            "params": {
641                                "type": "array",
642                                "items": {
643                                    "type": "object",
644                                    "required": ["name"],
645                                    "properties": {
646                                        "name": {"type": "string"},
647                                        "type": {"type": "string"},
648                                        "required": {"type": "boolean"},
649                                        "description": {"type": "string"},
650                                        "enum": {"type": "array", "items": {"type": "string"}},
651                                        "default": {}
652                                    }
653                                }
654                            },
655                            "returns": {
656                                "type": "object",
657                                "properties": {
658                                    "type": {"type": "string"},
659                                    "description": {"type": "string"}
660                                }
661                            },
662                            "examples": {
663                                "type": "array",
664                                "items": {
665                                    "type": "object",
666                                    "properties": {
667                                        "description": {"type": "string"},
668                                        "args": {},
669                                        "result": {}
670                                    }
671                                }
672                            },
673                            "related": {"type": "array", "items": {"type": "string"}},
674                            "since": {"type": "string"},
675                            "stability": {"type": "string", "enum": ["stable", "beta", "deprecated"]}
676                        }
677                    }
678                },
679                "categories": {
680                    "type": "object",
681                    "additionalProperties": {
682                        "type": "object",
683                        "properties": {
684                            "description": {"type": "string"},
685                            "subcategories": {"type": "array", "items": {"type": "string"}}
686                        }
687                    }
688                }
689            }
690        })
691    }
692}
693
694/// Result of registering a discovery spec.
695///
696/// Returned by [`DiscoveryRegistry::register`] to indicate success and any issues.
697#[derive(Debug, Clone, Serialize, Deserialize)]
698pub struct RegistrationResult {
699    /// Whether the registration succeeded
700    pub ok: bool,
701    /// Number of tools that were indexed
702    pub tools_indexed: usize,
703    /// Any warnings encountered during registration (e.g., duplicate tools)
704    pub warnings: Vec<String>,
705}
706
707/// Result from querying tools across registered servers.
708///
709/// Contains the matched tool along with relevance scoring and match details.
710#[derive(Debug, Clone, Serialize, Deserialize)]
711pub struct ToolQueryResult {
712    /// Unique tool identifier in format "server:tool_name"
713    pub id: String,
714    /// Name of the server providing this tool
715    pub server: String,
716    /// The tool specification
717    pub tool: ToolSpec,
718    /// BM25 relevance score (higher = better match)
719    pub score: f64,
720    /// Fields that matched the query, with matched terms
721    pub matches: HashMap<String, Vec<String>>,
722}
723
724/// Summary information about a registered server.
725///
726/// Used when listing all registered discovery servers.
727#[derive(Debug, Clone, Serialize, Deserialize)]
728pub struct ServerSummary {
729    /// Server name (unique identifier)
730    pub name: String,
731    /// Server version, if provided
732    pub version: Option<String>,
733    /// Server description, if provided
734    pub description: Option<String>,
735    /// Number of tools registered by this server
736    pub tool_count: usize,
737}
738
739/// Summary information about a tool category.
740///
741/// Aggregates category data across all registered servers.
742#[derive(Debug, Clone, Serialize, Deserialize)]
743pub struct CategorySummary {
744    /// Category name
745    pub name: String,
746    /// Total number of tools in this category across all servers
747    pub tool_count: usize,
748    /// Names of servers that have tools in this category
749    pub servers: Vec<String>,
750    /// Subcategories within this category
751    pub subcategories: Vec<String>,
752}
753
754/// Statistics about the discovery search index.
755///
756/// Provides insight into what has been indexed for debugging and monitoring.
757#[derive(Debug, Clone, Serialize, Deserialize)]
758pub struct IndexStats {
759    /// Number of documents (tools) in the index
760    pub doc_count: usize,
761    /// Number of unique terms in the index
762    pub term_count: usize,
763    /// Average document length (in terms)
764    pub avg_doc_length: f64,
765    /// Number of registered servers
766    pub server_count: usize,
767    /// Most frequent terms in the index with their counts
768    pub top_terms: Vec<(String, usize)>,
769}
770
771#[cfg(test)]
772mod tests {
773    use super::*;
774
775    fn sample_spec() -> DiscoverySpec {
776        serde_json::from_value(serde_json::json!({
777            "server": {
778                "name": "redisctl",
779                "version": "0.5.0",
780                "description": "Redis Enterprise management"
781            },
782            "tools": [
783                {
784                    "name": "create_cluster",
785                    "category": "clusters",
786                    "tags": ["write", "provisioning"],
787                    "summary": "Create a new Redis cluster",
788                    "description": "Creates a new Redis Enterprise cluster with specified configuration"
789                },
790                {
791                    "name": "delete_cluster",
792                    "category": "clusters",
793                    "tags": ["write", "destructive"],
794                    "summary": "Delete a cluster",
795                    "description": "Permanently deletes a Redis cluster"
796                },
797                {
798                    "name": "list_backups",
799                    "category": "backups",
800                    "tags": ["read"],
801                    "summary": "List all backups",
802                    "description": "Lists all available backups for a cluster"
803                }
804            ]
805        })).unwrap()
806    }
807
808    #[test]
809    fn test_register_spec() {
810        let mut registry = DiscoveryRegistry::new();
811        let spec = sample_spec();
812
813        let result = registry.register(spec, false);
814
815        assert!(result.ok);
816        assert_eq!(result.tools_indexed, 3);
817        assert!(result.warnings.is_empty());
818    }
819
820    #[test]
821    fn test_query_tools() {
822        let mut registry = DiscoveryRegistry::new();
823        registry.register(sample_spec(), false);
824
825        let results = registry.query("cluster", 10);
826
827        // All tools mention cluster in their descriptions, but cluster tools rank higher
828        assert!(!results.is_empty());
829        // Top results should be the cluster tools (they have "cluster" in name)
830        let top_names: Vec<_> = results
831            .iter()
832            .take(2)
833            .map(|r| r.tool.name.as_str())
834            .collect();
835        assert!(top_names.contains(&"create_cluster"));
836        assert!(top_names.contains(&"delete_cluster"));
837    }
838
839    #[test]
840    fn test_query_by_tag() {
841        let mut registry = DiscoveryRegistry::new();
842        registry.register(sample_spec(), false);
843
844        let results = registry.query("read", 10);
845
846        assert_eq!(results.len(), 1);
847        assert_eq!(results[0].tool.name, "list_backups");
848    }
849
850    #[test]
851    fn test_list_servers() {
852        let mut registry = DiscoveryRegistry::new();
853        registry.register(sample_spec(), false);
854
855        let servers = registry.list_servers();
856
857        assert_eq!(servers.len(), 1);
858        assert_eq!(servers[0].name, "redisctl");
859        assert_eq!(servers[0].tool_count, 3);
860    }
861
862    #[test]
863    fn test_list_categories() {
864        let mut registry = DiscoveryRegistry::new();
865        registry.register(sample_spec(), false);
866
867        let categories = registry.list_categories();
868
869        assert_eq!(categories.len(), 2);
870        assert!(categories.contains_key("clusters"));
871        assert!(categories.contains_key("backups"));
872        assert_eq!(categories.get("clusters").unwrap().tool_count, 2);
873    }
874
875    #[test]
876    fn test_unregister() {
877        let mut registry = DiscoveryRegistry::new();
878        registry.register(sample_spec(), false);
879
880        assert!(registry.unregister("redisctl"));
881        assert!(registry.list_servers().is_empty());
882        assert!(registry.query("cluster", 10).is_empty());
883    }
884
885    #[test]
886    fn test_replace_registration() {
887        let mut registry = DiscoveryRegistry::new();
888        registry.register(sample_spec(), false);
889
890        // Try to register again without replace - should fail
891        let result = registry.register(sample_spec(), false);
892        assert!(!result.ok);
893
894        // With replace - should succeed
895        let result = registry.register(sample_spec(), true);
896        assert!(result.ok);
897    }
898
899    #[test]
900    fn test_similar_tools() {
901        let mut registry = DiscoveryRegistry::new();
902        registry.register(sample_spec(), false);
903
904        let similar = registry.similar("redisctl:create_cluster", 10);
905
906        // delete_cluster should be similar (shares "cluster" terms)
907        assert!(!similar.is_empty());
908        assert_eq!(similar[0].tool.name, "delete_cluster");
909    }
910
911    #[test]
912    fn test_minimal_spec() {
913        let minimal: DiscoverySpec = serde_json::from_value(serde_json::json!({
914            "server": {"name": "minimal"},
915            "tools": [{"name": "foo"}]
916        }))
917        .unwrap();
918
919        let mut registry = DiscoveryRegistry::new();
920        let result = registry.register(minimal, false);
921
922        assert!(result.ok);
923        assert_eq!(result.tools_indexed, 1);
924    }
925
926    #[test]
927    fn test_get_schema() {
928        let schema = DiscoveryRegistry::get_schema();
929
930        assert!(schema.get("$schema").is_some());
931        assert!(schema.get("properties").is_some());
932    }
933
934    #[test]
935    fn test_index_stats() {
936        let mut registry = DiscoveryRegistry::new();
937        registry.register(sample_spec(), false);
938
939        let stats = registry.index_stats().unwrap();
940
941        assert_eq!(stats.doc_count, 3);
942        assert_eq!(stats.server_count, 1);
943        assert!(stats.term_count > 0);
944    }
945
946    // Preprocessing tests
947
948    #[test]
949    fn test_strip_jmespath_literals() {
950        // Basic backtick literal with JSON string
951        assert!(strip_jmespath_literals(r#"split text on `"\n"` newlines"#).contains("newline"));
952
953        // Backtick with escaped regex
954        let result = strip_jmespath_literals(r#"match `"\\d+"` digits"#);
955        assert!(result.contains("digit"));
956
957        // Multiple backticks
958        let result = strip_jmespath_literals(r#"use `"\t"` for tabs and `"\n"` for lines"#);
959        assert!(result.contains("tab"));
960        assert!(result.contains("newline"));
961
962        // Non-string backtick content preserved
963        let result = strip_jmespath_literals(r#"literal `123` number"#);
964        assert!(result.contains("123"));
965    }
966
967    #[test]
968    fn test_expand_escape_sequences() {
969        assert!(expand_escape_sequences(r"\n").contains("newline"));
970        assert!(expand_escape_sequences(r"\t").contains("tab"));
971        assert!(expand_escape_sequences(r"\d").contains("digit"));
972        assert!(expand_escape_sequences(r"\w").contains("word"));
973        assert!(expand_escape_sequences(r"\s").contains("whitespace"));
974    }
975
976    #[test]
977    fn test_expand_regex_patterns() {
978        assert!(expand_regex_patterns(r"\d+").contains("digits"));
979        assert!(expand_regex_patterns(r"\w+").contains("words"));
980        assert!(expand_regex_patterns(r"[0-9]").contains("digit"));
981        assert!(expand_regex_patterns(r"[a-zA-Z]").contains("letter"));
982        assert!(expand_regex_patterns(r".*").contains("any"));
983
984        // Metacharacters should be stripped
985        let result = expand_regex_patterns(r"foo[bar]+baz");
986        assert!(!result.contains('['));
987        assert!(!result.contains(']'));
988        assert!(!result.contains('+'));
989    }
990
991    #[test]
992    fn test_expand_identifiers() {
993        // snake_case should expand
994        let result = expand_identifiers("get_user_info");
995        assert!(result.contains("get"));
996        assert!(result.contains("user"));
997        assert!(result.contains("info"));
998        // Original preserved for exact match
999        assert!(result.contains("get_user_info"));
1000
1001        // camelCase should expand
1002        let result = expand_identifiers("getUserInfo");
1003        assert!(result.contains("get"));
1004        assert!(result.contains("user"));
1005        assert!(result.contains("info"));
1006        // Original preserved
1007        assert!(result.contains("getUserInfo"));
1008
1009        // Simple words unchanged
1010        let result = expand_identifiers("simple");
1011        assert!(result.contains("simple"));
1012    }
1013
1014    #[test]
1015    fn test_preprocess_for_search_integration() {
1016        // Full preprocessing pipeline
1017        let input = r#"Split on `"\n"` to get lines, use regex_extract for \d+ numbers"#;
1018        let result = preprocess_for_search(input);
1019
1020        // Should contain expanded terms
1021        assert!(result.contains("newline") || result.contains("linebreak"));
1022        assert!(result.contains("digit") || result.contains("number"));
1023        assert!(result.contains("regex"));
1024        assert!(result.contains("extract"));
1025
1026        // Should not have excess whitespace
1027        assert!(!result.contains("  "));
1028    }
1029
1030    #[test]
1031    fn test_preprocess_preserves_search_terms() {
1032        // Make sure useful search terms aren't lost
1033        let input = "Create a new database connection";
1034        let result = preprocess_for_search(input);
1035
1036        assert!(result.contains("Create"));
1037        assert!(result.contains("database"));
1038        assert!(result.contains("connection"));
1039    }
1040
1041    #[test]
1042    fn test_search_with_preprocessed_content() {
1043        // Test that preprocessing improves search for escape-heavy descriptions
1044        let spec: DiscoverySpec = serde_json::from_value(serde_json::json!({
1045            "server": {"name": "text-tools"},
1046            "tools": [
1047                {
1048                    "name": "split_lines",
1049                    "summary": r#"Split text on newlines using `"\n"` delimiter"#,
1050                    "description": r#"Splits input string on newline characters. Use split(@, `"\n"`) syntax."#
1051                },
1052                {
1053                    "name": "extract_numbers",
1054                    "summary": r#"Extract numeric patterns with regex `"\\d+"`"#,
1055                    "description": r#"Uses regex_extract to find all \d+ digit sequences in text."#
1056                }
1057            ]
1058        }))
1059        .unwrap();
1060
1061        let mut registry = DiscoveryRegistry::new();
1062        registry.register(spec, false);
1063
1064        // Search for "newline" should find split_lines due to preprocessing
1065        let results = registry.query("newline", 10);
1066        assert!(!results.is_empty());
1067        assert_eq!(results[0].tool.name, "split_lines");
1068
1069        // Search for "digit" should find extract_numbers
1070        let results = registry.query("digit", 10);
1071        assert!(!results.is_empty());
1072        assert_eq!(results[0].tool.name, "extract_numbers");
1073    }
1074
1075    #[test]
1076    fn test_register_duplicate_tool_names() {
1077        let mut registry = DiscoveryRegistry::new();
1078
1079        let spec_a: DiscoverySpec = serde_json::from_value(serde_json::json!({
1080            "server": {"name": "server-a"},
1081            "tools": [{"name": "do_thing", "summary": "Does a thing from server A"}]
1082        }))
1083        .unwrap();
1084
1085        let spec_b: DiscoverySpec = serde_json::from_value(serde_json::json!({
1086            "server": {"name": "server-b"},
1087            "tools": [{"name": "do_thing", "summary": "Does a thing from server B"}]
1088        }))
1089        .unwrap();
1090
1091        let result_a = registry.register(spec_a, false);
1092        let result_b = registry.register(spec_b, false);
1093
1094        assert!(result_a.ok);
1095        assert!(result_b.ok);
1096        assert_eq!(result_a.tools_indexed, 1);
1097        assert_eq!(result_b.tools_indexed, 1);
1098
1099        // Both should be indexed under their unique tool_id ("server:name")
1100        assert!(registry.tools.contains_key("server-a:do_thing"));
1101        assert!(registry.tools.contains_key("server-b:do_thing"));
1102
1103        // Query should return results from both
1104        let results = registry.query("do_thing", 10);
1105        assert_eq!(results.len(), 2);
1106
1107        let servers: Vec<_> = results.iter().map(|r| r.server.as_str()).collect();
1108        assert!(servers.contains(&"server-a"));
1109        assert!(servers.contains(&"server-b"));
1110    }
1111
1112    #[test]
1113    fn test_query_no_results() {
1114        let mut registry = DiscoveryRegistry::new();
1115        registry.register(sample_spec(), false);
1116
1117        let results = registry.query("xyznonexistent", 10);
1118        assert!(results.is_empty());
1119    }
1120
1121    #[test]
1122    fn test_query_empty_registry() {
1123        let registry = DiscoveryRegistry::new();
1124
1125        let results = registry.query("cluster", 10);
1126        assert!(results.is_empty());
1127    }
1128
1129    #[test]
1130    fn test_index_stats_empty_registry() {
1131        let registry = DiscoveryRegistry::new();
1132
1133        assert!(registry.index_stats().is_none());
1134    }
1135
1136    #[test]
1137    fn test_category_filtering_edge_case() {
1138        let spec: DiscoverySpec = serde_json::from_value(serde_json::json!({
1139            "server": {"name": "mixed-server"},
1140            "tools": [
1141                {
1142                    "name": "categorized_tool",
1143                    "category": "utils",
1144                    "summary": "A tool with a category"
1145                },
1146                {
1147                    "name": "uncategorized_tool",
1148                    "summary": "A tool without a category"
1149                }
1150            ]
1151        }))
1152        .unwrap();
1153
1154        let mut registry = DiscoveryRegistry::new();
1155        registry.register(spec, false);
1156
1157        let categories = registry.list_categories();
1158
1159        // Only "utils" should appear; uncategorized tool should not create an entry
1160        assert_eq!(categories.len(), 1);
1161        assert!(categories.contains_key("utils"));
1162        assert_eq!(categories.get("utils").unwrap().tool_count, 1);
1163    }
1164
1165    #[test]
1166    fn test_unregister_nonexistent() {
1167        let mut registry = DiscoveryRegistry::new();
1168
1169        assert!(!registry.unregister("never-registered"));
1170    }
1171
1172    #[test]
1173    fn test_multiple_servers() {
1174        let mut registry = DiscoveryRegistry::new();
1175
1176        let spec_redis = sample_spec();
1177
1178        let spec_postgres: DiscoverySpec = serde_json::from_value(serde_json::json!({
1179            "server": {
1180                "name": "pgctl",
1181                "version": "1.0.0",
1182                "description": "PostgreSQL management"
1183            },
1184            "tools": [
1185                {
1186                    "name": "create_database",
1187                    "category": "databases",
1188                    "tags": ["write"],
1189                    "summary": "Create a new PostgreSQL database",
1190                    "description": "Creates a new PostgreSQL database with specified configuration"
1191                },
1192                {
1193                    "name": "list_tables",
1194                    "category": "tables",
1195                    "tags": ["read"],
1196                    "summary": "List all tables in a database",
1197                    "description": "Lists all tables in a PostgreSQL database"
1198                }
1199            ]
1200        }))
1201        .unwrap();
1202
1203        registry.register(spec_redis, false);
1204        registry.register(spec_postgres, false);
1205
1206        // list_servers should show both
1207        let servers = registry.list_servers();
1208        assert_eq!(servers.len(), 2);
1209        let server_names: Vec<_> = servers.iter().map(|s| s.name.as_str()).collect();
1210        assert!(server_names.contains(&"redisctl"));
1211        assert!(server_names.contains(&"pgctl"));
1212
1213        // Query for "create" should find tools from both servers
1214        let results = registry.query("create", 10);
1215        assert!(results.len() >= 2);
1216        let result_servers: Vec<_> = results.iter().map(|r| r.server.as_str()).collect();
1217        assert!(result_servers.contains(&"redisctl"));
1218        assert!(result_servers.contains(&"pgctl"));
1219
1220        // Query for "PostgreSQL" should only find pgctl tools
1221        let results = registry.query("PostgreSQL", 10);
1222        assert!(!results.is_empty());
1223        assert!(results.iter().all(|r| r.server == "pgctl"));
1224    }
1225}