1use crate::bm25::{Bm25Index, IndexOptions};
20use serde::{Deserialize, Serialize};
21use serde_json::Value;
22use std::collections::HashMap;
23
24#[cfg(feature = "schema")]
25use schemars::JsonSchema;
26
27const STOP_WORDS: &[&str] = &[
30 "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "he", "in", "is", "it",
31 "its", "of", "on", "or", "that", "the", "to", "was", "were", "will", "with", "this", "but",
32 "they", "have", "had", "what", "when", "where", "who", "which", "why", "how", "all", "each",
33 "every", "both", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only",
34 "own", "same", "so", "than", "too", "very", "just", "can", "could", "should", "would", "may",
35 "might", "must", "shall", "about", "above", "after", "again", "against", "below", "between",
36 "into", "through", "during", "before", "under", "over",
37];
38
39fn preprocess_for_search(text: &str) -> String {
47 let mut result = text.to_string();
48
49 result = strip_jmespath_literals(&result);
52
53 result = expand_regex_patterns(&result);
55
56 result = expand_identifiers(&result);
58
59 result.split_whitespace().collect::<Vec<_>>().join(" ")
61}
62
63fn strip_jmespath_literals(text: &str) -> String {
65 let mut result = String::with_capacity(text.len());
66 let mut chars = text.chars().peekable();
67
68 while let Some(c) = chars.next() {
69 if c == '`' {
70 let mut inner = String::new();
72 for inner_c in chars.by_ref() {
73 if inner_c == '`' {
74 break;
75 }
76 inner.push(inner_c);
77 }
78 let trimmed = inner.trim();
80 if trimmed.starts_with('"') && trimmed.ends_with('"') {
81 let content = &trimmed[1..trimmed.len() - 1];
82 let expanded = expand_escape_sequences(content);
84 result.push(' ');
85 result.push_str(&expanded);
86 result.push(' ');
87 } else {
88 result.push(' ');
90 result.push_str(trimmed);
91 result.push(' ');
92 }
93 } else {
94 result.push(c);
95 }
96 }
97
98 result
99}
100
101fn expand_escape_sequences(text: &str) -> String {
103 text.replace("\\n", " newline linebreak ")
104 .replace("\\r", " return ")
105 .replace("\\t", " tab ")
106 .replace("\\s", " whitespace space ")
107 .replace("\\d", " digit number numeric ")
108 .replace("\\w", " word alphanumeric ")
109 .replace("\\b", " boundary ")
110 .replace("\\\\", " ")
111}
112
113fn expand_regex_patterns(text: &str) -> String {
115 text
116 .replace("[0-9]", " digit number ")
118 .replace("[a-z]", " letter lowercase ")
119 .replace("[A-Z]", " letter uppercase ")
120 .replace("[a-zA-Z]", " letter alphabetic ")
121 .replace("[^>]", " ")
122 .replace(".*", " any anything ")
123 .replace(".+", " one more any ")
124 .replace("\\d+", " digits numbers numeric ")
125 .replace("\\w+", " words alphanumeric ")
126 .replace("\\s+", " whitespace spaces ")
127 .replace("\\S+", " nonwhitespace ")
128 .replace(
130 ['[', ']', '(', ')', '{', '}', '*', '+', '?', '^', '$', '|'],
131 " ",
132 )
133}
134
135fn expand_identifiers(text: &str) -> String {
137 let mut result = String::with_capacity(text.len() * 2);
138
139 for word in text.split_whitespace() {
140 if word.contains('_') {
142 for part in word.split('_') {
143 if !part.is_empty() {
144 result.push_str(part);
145 result.push(' ');
146 }
147 }
148 result.push_str(word);
150 result.push(' ');
151 }
152 else if word.chars().any(|c| c.is_uppercase()) && word.chars().any(|c| c.is_lowercase()) {
154 let mut prev_was_upper = false;
155 let mut current_word = String::new();
156
157 for c in word.chars() {
158 if c.is_uppercase() && !prev_was_upper && !current_word.is_empty() {
159 result.push_str(¤t_word.to_lowercase());
160 result.push(' ');
161 current_word.clear();
162 }
163 current_word.push(c);
164 prev_was_upper = c.is_uppercase();
165 }
166 if !current_word.is_empty() {
167 result.push_str(¤t_word.to_lowercase());
168 result.push(' ');
169 }
170 result.push_str(word);
172 result.push(' ');
173 } else {
174 result.push_str(word);
175 result.push(' ');
176 }
177 }
178
179 result
180}
181
182#[derive(Debug, Clone, Serialize, Deserialize)]
184#[cfg_attr(feature = "schema", derive(JsonSchema))]
185pub struct DiscoverySpec {
186 #[serde(rename = "$schema", skip_serializing_if = "Option::is_none")]
188 pub schema: Option<String>,
189
190 pub server: ServerInfo,
192
193 pub tools: Vec<ToolSpec>,
195
196 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
198 pub categories: HashMap<String, CategoryInfo>,
199}
200
201#[derive(Debug, Clone, Serialize, Deserialize)]
203#[cfg_attr(feature = "schema", derive(JsonSchema))]
204pub struct ServerInfo {
205 pub name: String,
207
208 #[serde(skip_serializing_if = "Option::is_none")]
210 pub version: Option<String>,
211
212 #[serde(skip_serializing_if = "Option::is_none")]
214 pub description: Option<String>,
215}
216
217#[derive(Debug, Clone, Serialize, Deserialize)]
219#[cfg_attr(feature = "schema", derive(JsonSchema))]
220pub struct ToolSpec {
221 pub name: String,
223
224 #[serde(default, skip_serializing_if = "Vec::is_empty")]
226 pub aliases: Vec<String>,
227
228 #[serde(skip_serializing_if = "Option::is_none")]
230 pub category: Option<String>,
231
232 #[serde(skip_serializing_if = "Option::is_none")]
234 pub subcategory: Option<String>,
235
236 #[serde(default, skip_serializing_if = "Vec::is_empty")]
238 pub tags: Vec<String>,
239
240 #[serde(skip_serializing_if = "Option::is_none")]
242 pub summary: Option<String>,
243
244 #[serde(skip_serializing_if = "Option::is_none")]
246 pub description: Option<String>,
247
248 #[serde(default, skip_serializing_if = "Vec::is_empty")]
250 pub params: Vec<ParamSpec>,
251
252 #[serde(skip_serializing_if = "Option::is_none")]
254 pub returns: Option<ReturnSpec>,
255
256 #[serde(default, skip_serializing_if = "Vec::is_empty")]
258 pub examples: Vec<ExampleSpec>,
259
260 #[serde(default, skip_serializing_if = "Vec::is_empty")]
262 pub related: Vec<String>,
263
264 #[serde(skip_serializing_if = "Option::is_none")]
266 pub since: Option<String>,
267
268 #[serde(skip_serializing_if = "Option::is_none")]
270 pub stability: Option<String>,
271}
272
273#[derive(Debug, Clone, Serialize, Deserialize)]
275#[cfg_attr(feature = "schema", derive(JsonSchema))]
276pub struct ParamSpec {
277 pub name: String,
279
280 #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
282 pub param_type: Option<String>,
283
284 #[serde(default)]
286 pub required: bool,
287
288 #[serde(skip_serializing_if = "Option::is_none")]
290 pub description: Option<String>,
291
292 #[serde(rename = "enum", skip_serializing_if = "Option::is_none")]
294 pub enum_values: Option<Vec<String>>,
295
296 #[serde(skip_serializing_if = "Option::is_none")]
298 pub default: Option<Value>,
299}
300
301#[derive(Debug, Clone, Serialize, Deserialize)]
303#[cfg_attr(feature = "schema", derive(JsonSchema))]
304pub struct ReturnSpec {
305 #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
307 pub return_type: Option<String>,
308
309 #[serde(skip_serializing_if = "Option::is_none")]
311 pub description: Option<String>,
312}
313
314#[derive(Debug, Clone, Serialize, Deserialize)]
316#[cfg_attr(feature = "schema", derive(JsonSchema))]
317pub struct ExampleSpec {
318 #[serde(skip_serializing_if = "Option::is_none")]
320 pub description: Option<String>,
321
322 #[serde(skip_serializing_if = "Option::is_none")]
324 pub args: Option<Value>,
325
326 #[serde(skip_serializing_if = "Option::is_none")]
328 pub result: Option<Value>,
329}
330
331#[derive(Debug, Clone, Serialize, Deserialize)]
333#[cfg_attr(feature = "schema", derive(JsonSchema))]
334pub struct CategoryInfo {
335 #[serde(skip_serializing_if = "Option::is_none")]
337 pub description: Option<String>,
338
339 #[serde(default, skip_serializing_if = "Vec::is_empty")]
341 pub subcategories: Vec<String>,
342}
343
344#[derive(Debug)]
346pub struct DiscoveryRegistry {
347 servers: HashMap<String, DiscoverySpec>,
349
350 tools: HashMap<String, (String, ToolSpec)>,
352
353 index: Option<Bm25Index>,
355}
356
357impl Default for DiscoveryRegistry {
358 fn default() -> Self {
359 Self::new()
360 }
361}
362
363impl DiscoveryRegistry {
364 pub fn new() -> Self {
366 Self {
367 servers: HashMap::new(),
368 tools: HashMap::new(),
369 index: None,
370 }
371 }
372
373 pub fn register(&mut self, spec: DiscoverySpec, replace: bool) -> RegistrationResult {
375 let server_name = spec.server.name.clone();
376
377 if self.servers.contains_key(&server_name) && !replace {
379 return RegistrationResult {
380 ok: false,
381 tools_indexed: 0,
382 warnings: vec![format!(
383 "Server '{}' already registered. Use replace=true to update.",
384 server_name
385 )],
386 };
387 }
388
389 if replace {
391 self.tools.retain(|_, (srv, _)| srv != &server_name);
392 }
393
394 let mut warnings = Vec::new();
396 let mut tools_added = 0;
397
398 for tool in &spec.tools {
399 let tool_id = format!("{}:{}", server_name, tool.name);
400
401 if self.tools.contains_key(&tool_id) && !replace {
402 warnings.push(format!("Tool '{}' already exists, skipping", tool_id));
403 continue;
404 }
405
406 self.tools
407 .insert(tool_id, (server_name.clone(), tool.clone()));
408 tools_added += 1;
409 }
410
411 self.servers.insert(server_name, spec);
413
414 self.rebuild_index();
416
417 RegistrationResult {
418 ok: true,
419 tools_indexed: tools_added,
420 warnings,
421 }
422 }
423
424 pub fn unregister(&mut self, server_name: &str) -> bool {
426 if self.servers.remove(server_name).is_some() {
427 self.tools.retain(|_, (srv, _)| srv != server_name);
428 self.rebuild_index();
429 true
430 } else {
431 false
432 }
433 }
434
435 fn rebuild_index(&mut self) {
437 if self.tools.is_empty() {
438 self.index = None;
439 return;
440 }
441
442 let docs: Vec<Value> = self
444 .tools
445 .iter()
446 .map(|(id, (server, tool))| {
447 let summary = tool.summary.as_deref().unwrap_or("");
448 let description = tool.description.as_deref().unwrap_or("");
449
450 let expanded_summary = preprocess_for_search(summary);
452 let expanded_description = preprocess_for_search(description);
453
454 let examples_text: String = tool
456 .examples
457 .iter()
458 .filter_map(|ex| ex.description.as_ref())
459 .map(|d| preprocess_for_search(d))
460 .collect::<Vec<_>>()
461 .join(" ");
462
463 serde_json::json!({
464 "id": id,
465 "server": server,
466 "name": tool.name,
467 "aliases": tool.aliases.join(" "),
468 "category": tool.category.as_deref().unwrap_or(""),
469 "tags": tool.tags.join(" "),
470 "summary": summary,
471 "description": description,
472 "params": tool.params.iter().map(|p| p.name.as_str()).collect::<Vec<_>>().join(" "),
473 "expanded_summary": expanded_summary,
475 "expanded_description": expanded_description,
476 "expanded_examples": examples_text,
477 })
478 })
479 .collect();
480
481 let options = IndexOptions {
482 fields: vec![
483 "name".to_string(),
484 "aliases".to_string(),
485 "category".to_string(),
486 "tags".to_string(),
487 "summary".to_string(),
488 "description".to_string(),
489 "params".to_string(),
490 "expanded_summary".to_string(),
492 "expanded_description".to_string(),
493 "expanded_examples".to_string(),
494 ],
495 id_field: Some("id".to_string()),
496 stopwords: STOP_WORDS.iter().map(|s| s.to_string()).collect(),
497 ..Default::default()
498 };
499
500 self.index = Some(Bm25Index::build(&docs, options));
501 }
502
503 pub fn query(&self, query: &str, top_k: usize) -> Vec<ToolQueryResult> {
505 let Some(index) = &self.index else {
506 return Vec::new();
507 };
508
509 let results = index.search(query, top_k);
510
511 results
512 .into_iter()
513 .filter_map(|r| {
514 let (server, tool) = self.tools.get(&r.id)?;
515 Some(ToolQueryResult {
516 id: r.id,
517 server: server.clone(),
518 tool: tool.clone(),
519 score: r.score,
520 matches: r.matches,
521 })
522 })
523 .collect()
524 }
525
526 pub fn similar(&self, tool_id: &str, top_k: usize) -> Vec<ToolQueryResult> {
528 let Some(index) = &self.index else {
529 return Vec::new();
530 };
531
532 let results = index.similar(tool_id, top_k);
533
534 results
535 .into_iter()
536 .filter_map(|r| {
537 let (server, tool) = self.tools.get(&r.id)?;
538 Some(ToolQueryResult {
539 id: r.id,
540 server: server.clone(),
541 tool: tool.clone(),
542 score: r.score,
543 matches: r.matches,
544 })
545 })
546 .collect()
547 }
548
549 pub fn list_servers(&self) -> Vec<ServerSummary> {
551 self.servers
552 .iter()
553 .map(|(name, spec)| ServerSummary {
554 name: name.clone(),
555 version: spec.server.version.clone(),
556 description: spec.server.description.clone(),
557 tool_count: spec.tools.len(),
558 })
559 .collect()
560 }
561
562 pub fn list_categories(&self) -> HashMap<String, CategorySummary> {
564 let mut categories: HashMap<String, CategorySummary> = HashMap::new();
565
566 for (server, tool) in self.tools.values() {
567 if let Some(cat) = &tool.category {
568 let entry = categories.entry(cat.clone()).or_insert(CategorySummary {
569 name: cat.clone(),
570 tool_count: 0,
571 servers: Vec::new(),
572 subcategories: Vec::new(),
573 });
574 entry.tool_count += 1;
575 if !entry.servers.contains(server) {
576 entry.servers.push(server.clone());
577 }
578 if let Some(subcat) = tool
579 .subcategory
580 .as_ref()
581 .filter(|s| !entry.subcategories.contains(s))
582 {
583 entry.subcategories.push(subcat.clone());
584 }
585 }
586 }
587
588 categories
589 }
590
591 pub fn index_stats(&self) -> Option<IndexStats> {
593 let index = self.index.as_ref()?;
594
595 Some(IndexStats {
596 doc_count: index.doc_count,
597 term_count: index.terms.len(),
598 avg_doc_length: index.avg_doc_length,
599 server_count: self.servers.len(),
600 top_terms: index.terms().into_iter().take(20).collect(),
601 })
602 }
603
604 pub fn get_schema() -> Value {
606 serde_json::json!({
607 "$schema": "http://json-schema.org/draft-07/schema#",
608 "$id": "https://jpx.dev/schemas/mcp-discovery/v1.json",
609 "title": "MCP Discovery Spec",
610 "description": "Schema for registering MCP server capabilities with jpx",
611 "type": "object",
612 "required": ["server", "tools"],
613 "properties": {
614 "$schema": {
615 "type": "string",
616 "description": "JSON Schema reference"
617 },
618 "server": {
619 "type": "object",
620 "required": ["name"],
621 "properties": {
622 "name": {"type": "string", "description": "Server name"},
623 "version": {"type": "string", "description": "Server version"},
624 "description": {"type": "string", "description": "Server description"}
625 }
626 },
627 "tools": {
628 "type": "array",
629 "items": {
630 "type": "object",
631 "required": ["name"],
632 "properties": {
633 "name": {"type": "string", "description": "Tool name"},
634 "aliases": {"type": "array", "items": {"type": "string"}},
635 "category": {"type": "string"},
636 "subcategory": {"type": "string"},
637 "tags": {"type": "array", "items": {"type": "string"}},
638 "summary": {"type": "string", "description": "Short summary"},
639 "description": {"type": "string", "description": "Full description"},
640 "params": {
641 "type": "array",
642 "items": {
643 "type": "object",
644 "required": ["name"],
645 "properties": {
646 "name": {"type": "string"},
647 "type": {"type": "string"},
648 "required": {"type": "boolean"},
649 "description": {"type": "string"},
650 "enum": {"type": "array", "items": {"type": "string"}},
651 "default": {}
652 }
653 }
654 },
655 "returns": {
656 "type": "object",
657 "properties": {
658 "type": {"type": "string"},
659 "description": {"type": "string"}
660 }
661 },
662 "examples": {
663 "type": "array",
664 "items": {
665 "type": "object",
666 "properties": {
667 "description": {"type": "string"},
668 "args": {},
669 "result": {}
670 }
671 }
672 },
673 "related": {"type": "array", "items": {"type": "string"}},
674 "since": {"type": "string"},
675 "stability": {"type": "string", "enum": ["stable", "beta", "deprecated"]}
676 }
677 }
678 },
679 "categories": {
680 "type": "object",
681 "additionalProperties": {
682 "type": "object",
683 "properties": {
684 "description": {"type": "string"},
685 "subcategories": {"type": "array", "items": {"type": "string"}}
686 }
687 }
688 }
689 }
690 })
691 }
692}
693
694#[derive(Debug, Clone, Serialize, Deserialize)]
696pub struct RegistrationResult {
697 pub ok: bool,
698 pub tools_indexed: usize,
699 pub warnings: Vec<String>,
700}
701
702#[derive(Debug, Clone, Serialize, Deserialize)]
704pub struct ToolQueryResult {
705 pub id: String,
706 pub server: String,
707 pub tool: ToolSpec,
708 pub score: f64,
709 pub matches: HashMap<String, Vec<String>>,
710}
711
712#[derive(Debug, Clone, Serialize, Deserialize)]
714pub struct ServerSummary {
715 pub name: String,
716 pub version: Option<String>,
717 pub description: Option<String>,
718 pub tool_count: usize,
719}
720
721#[derive(Debug, Clone, Serialize, Deserialize)]
723pub struct CategorySummary {
724 pub name: String,
725 pub tool_count: usize,
726 pub servers: Vec<String>,
727 pub subcategories: Vec<String>,
728}
729
730#[derive(Debug, Clone, Serialize, Deserialize)]
732pub struct IndexStats {
733 pub doc_count: usize,
734 pub term_count: usize,
735 pub avg_doc_length: f64,
736 pub server_count: usize,
737 pub top_terms: Vec<(String, usize)>,
738}
739
740#[cfg(test)]
741mod tests {
742 use super::*;
743
744 fn sample_spec() -> DiscoverySpec {
745 serde_json::from_value(serde_json::json!({
746 "server": {
747 "name": "redisctl",
748 "version": "0.5.0",
749 "description": "Redis Enterprise management"
750 },
751 "tools": [
752 {
753 "name": "create_cluster",
754 "category": "clusters",
755 "tags": ["write", "provisioning"],
756 "summary": "Create a new Redis cluster",
757 "description": "Creates a new Redis Enterprise cluster with specified configuration"
758 },
759 {
760 "name": "delete_cluster",
761 "category": "clusters",
762 "tags": ["write", "destructive"],
763 "summary": "Delete a cluster",
764 "description": "Permanently deletes a Redis cluster"
765 },
766 {
767 "name": "list_backups",
768 "category": "backups",
769 "tags": ["read"],
770 "summary": "List all backups",
771 "description": "Lists all available backups for a cluster"
772 }
773 ]
774 })).unwrap()
775 }
776
777 #[test]
778 fn test_register_spec() {
779 let mut registry = DiscoveryRegistry::new();
780 let spec = sample_spec();
781
782 let result = registry.register(spec, false);
783
784 assert!(result.ok);
785 assert_eq!(result.tools_indexed, 3);
786 assert!(result.warnings.is_empty());
787 }
788
789 #[test]
790 fn test_query_tools() {
791 let mut registry = DiscoveryRegistry::new();
792 registry.register(sample_spec(), false);
793
794 let results = registry.query("cluster", 10);
795
796 assert!(!results.is_empty());
798 let top_names: Vec<_> = results
800 .iter()
801 .take(2)
802 .map(|r| r.tool.name.as_str())
803 .collect();
804 assert!(top_names.contains(&"create_cluster"));
805 assert!(top_names.contains(&"delete_cluster"));
806 }
807
808 #[test]
809 fn test_query_by_tag() {
810 let mut registry = DiscoveryRegistry::new();
811 registry.register(sample_spec(), false);
812
813 let results = registry.query("read", 10);
814
815 assert_eq!(results.len(), 1);
816 assert_eq!(results[0].tool.name, "list_backups");
817 }
818
819 #[test]
820 fn test_list_servers() {
821 let mut registry = DiscoveryRegistry::new();
822 registry.register(sample_spec(), false);
823
824 let servers = registry.list_servers();
825
826 assert_eq!(servers.len(), 1);
827 assert_eq!(servers[0].name, "redisctl");
828 assert_eq!(servers[0].tool_count, 3);
829 }
830
831 #[test]
832 fn test_list_categories() {
833 let mut registry = DiscoveryRegistry::new();
834 registry.register(sample_spec(), false);
835
836 let categories = registry.list_categories();
837
838 assert_eq!(categories.len(), 2);
839 assert!(categories.contains_key("clusters"));
840 assert!(categories.contains_key("backups"));
841 assert_eq!(categories.get("clusters").unwrap().tool_count, 2);
842 }
843
844 #[test]
845 fn test_unregister() {
846 let mut registry = DiscoveryRegistry::new();
847 registry.register(sample_spec(), false);
848
849 assert!(registry.unregister("redisctl"));
850 assert!(registry.list_servers().is_empty());
851 assert!(registry.query("cluster", 10).is_empty());
852 }
853
854 #[test]
855 fn test_replace_registration() {
856 let mut registry = DiscoveryRegistry::new();
857 registry.register(sample_spec(), false);
858
859 let result = registry.register(sample_spec(), false);
861 assert!(!result.ok);
862
863 let result = registry.register(sample_spec(), true);
865 assert!(result.ok);
866 }
867
868 #[test]
869 fn test_similar_tools() {
870 let mut registry = DiscoveryRegistry::new();
871 registry.register(sample_spec(), false);
872
873 let similar = registry.similar("redisctl:create_cluster", 10);
874
875 assert!(!similar.is_empty());
877 assert_eq!(similar[0].tool.name, "delete_cluster");
878 }
879
880 #[test]
881 fn test_minimal_spec() {
882 let minimal: DiscoverySpec = serde_json::from_value(serde_json::json!({
883 "server": {"name": "minimal"},
884 "tools": [{"name": "foo"}]
885 }))
886 .unwrap();
887
888 let mut registry = DiscoveryRegistry::new();
889 let result = registry.register(minimal, false);
890
891 assert!(result.ok);
892 assert_eq!(result.tools_indexed, 1);
893 }
894
895 #[test]
896 fn test_get_schema() {
897 let schema = DiscoveryRegistry::get_schema();
898
899 assert!(schema.get("$schema").is_some());
900 assert!(schema.get("properties").is_some());
901 }
902
903 #[test]
904 fn test_index_stats() {
905 let mut registry = DiscoveryRegistry::new();
906 registry.register(sample_spec(), false);
907
908 let stats = registry.index_stats().unwrap();
909
910 assert_eq!(stats.doc_count, 3);
911 assert_eq!(stats.server_count, 1);
912 assert!(stats.term_count > 0);
913 }
914
915 #[test]
918 fn test_strip_jmespath_literals() {
919 assert!(strip_jmespath_literals(r#"split text on `"\n"` newlines"#).contains("newline"));
921
922 let result = strip_jmespath_literals(r#"match `"\\d+"` digits"#);
924 assert!(result.contains("digit"));
925
926 let result = strip_jmespath_literals(r#"use `"\t"` for tabs and `"\n"` for lines"#);
928 assert!(result.contains("tab"));
929 assert!(result.contains("newline"));
930
931 let result = strip_jmespath_literals(r#"literal `123` number"#);
933 assert!(result.contains("123"));
934 }
935
936 #[test]
937 fn test_expand_escape_sequences() {
938 assert!(expand_escape_sequences(r"\n").contains("newline"));
939 assert!(expand_escape_sequences(r"\t").contains("tab"));
940 assert!(expand_escape_sequences(r"\d").contains("digit"));
941 assert!(expand_escape_sequences(r"\w").contains("word"));
942 assert!(expand_escape_sequences(r"\s").contains("whitespace"));
943 }
944
945 #[test]
946 fn test_expand_regex_patterns() {
947 assert!(expand_regex_patterns(r"\d+").contains("digits"));
948 assert!(expand_regex_patterns(r"\w+").contains("words"));
949 assert!(expand_regex_patterns(r"[0-9]").contains("digit"));
950 assert!(expand_regex_patterns(r"[a-zA-Z]").contains("letter"));
951 assert!(expand_regex_patterns(r".*").contains("any"));
952
953 let result = expand_regex_patterns(r"foo[bar]+baz");
955 assert!(!result.contains('['));
956 assert!(!result.contains(']'));
957 assert!(!result.contains('+'));
958 }
959
960 #[test]
961 fn test_expand_identifiers() {
962 let result = expand_identifiers("get_user_info");
964 assert!(result.contains("get"));
965 assert!(result.contains("user"));
966 assert!(result.contains("info"));
967 assert!(result.contains("get_user_info"));
969
970 let result = expand_identifiers("getUserInfo");
972 assert!(result.contains("get"));
973 assert!(result.contains("user"));
974 assert!(result.contains("info"));
975 assert!(result.contains("getUserInfo"));
977
978 let result = expand_identifiers("simple");
980 assert!(result.contains("simple"));
981 }
982
983 #[test]
984 fn test_preprocess_for_search_integration() {
985 let input = r#"Split on `"\n"` to get lines, use regex_extract for \d+ numbers"#;
987 let result = preprocess_for_search(input);
988
989 assert!(result.contains("newline") || result.contains("linebreak"));
991 assert!(result.contains("digit") || result.contains("number"));
992 assert!(result.contains("regex"));
993 assert!(result.contains("extract"));
994
995 assert!(!result.contains(" "));
997 }
998
999 #[test]
1000 fn test_preprocess_preserves_search_terms() {
1001 let input = "Create a new database connection";
1003 let result = preprocess_for_search(input);
1004
1005 assert!(result.contains("Create"));
1006 assert!(result.contains("database"));
1007 assert!(result.contains("connection"));
1008 }
1009
1010 #[test]
1011 fn test_search_with_preprocessed_content() {
1012 let spec: DiscoverySpec = serde_json::from_value(serde_json::json!({
1014 "server": {"name": "text-tools"},
1015 "tools": [
1016 {
1017 "name": "split_lines",
1018 "summary": r#"Split text on newlines using `"\n"` delimiter"#,
1019 "description": r#"Splits input string on newline characters. Use split(@, `"\n"`) syntax."#
1020 },
1021 {
1022 "name": "extract_numbers",
1023 "summary": r#"Extract numeric patterns with regex `"\\d+"`"#,
1024 "description": r#"Uses regex_extract to find all \d+ digit sequences in text."#
1025 }
1026 ]
1027 }))
1028 .unwrap();
1029
1030 let mut registry = DiscoveryRegistry::new();
1031 registry.register(spec, false);
1032
1033 let results = registry.query("newline", 10);
1035 assert!(!results.is_empty());
1036 assert_eq!(results[0].tool.name, "split_lines");
1037
1038 let results = registry.query("digit", 10);
1040 assert!(!results.is_empty());
1041 assert_eq!(results[0].tool.name, "extract_numbers");
1042 }
1043}