1use crate::bm25::{Bm25Index, IndexOptions};
20use serde::{Deserialize, Serialize};
21use serde_json::Value;
22use std::collections::HashMap;
23
24const STOP_WORDS: &[&str] = &[
27 "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "he", "in", "is", "it",
28 "its", "of", "on", "or", "that", "the", "to", "was", "were", "will", "with", "this", "but",
29 "they", "have", "had", "what", "when", "where", "who", "which", "why", "how", "all", "each",
30 "every", "both", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only",
31 "own", "same", "so", "than", "too", "very", "just", "can", "could", "should", "would", "may",
32 "might", "must", "shall", "about", "above", "after", "again", "against", "below", "between",
33 "into", "through", "during", "before", "under", "over",
34];
35
36fn preprocess_for_search(text: &str) -> String {
44 let mut result = text.to_string();
45
46 result = strip_jmespath_literals(&result);
49
50 result = expand_regex_patterns(&result);
52
53 result = expand_identifiers(&result);
55
56 result.split_whitespace().collect::<Vec<_>>().join(" ")
58}
59
60fn strip_jmespath_literals(text: &str) -> String {
62 let mut result = String::with_capacity(text.len());
63 let mut chars = text.chars().peekable();
64
65 while let Some(c) = chars.next() {
66 if c == '`' {
67 let mut inner = String::new();
69 for inner_c in chars.by_ref() {
70 if inner_c == '`' {
71 break;
72 }
73 inner.push(inner_c);
74 }
75 let trimmed = inner.trim();
77 if trimmed.starts_with('"') && trimmed.ends_with('"') {
78 let content = &trimmed[1..trimmed.len() - 1];
79 let expanded = expand_escape_sequences(content);
81 result.push(' ');
82 result.push_str(&expanded);
83 result.push(' ');
84 } else {
85 result.push(' ');
87 result.push_str(trimmed);
88 result.push(' ');
89 }
90 } else {
91 result.push(c);
92 }
93 }
94
95 result
96}
97
98fn expand_escape_sequences(text: &str) -> String {
100 text.replace("\\n", " newline linebreak ")
101 .replace("\\r", " return ")
102 .replace("\\t", " tab ")
103 .replace("\\s", " whitespace space ")
104 .replace("\\d", " digit number numeric ")
105 .replace("\\w", " word alphanumeric ")
106 .replace("\\b", " boundary ")
107 .replace("\\\\", " ")
108}
109
110fn expand_regex_patterns(text: &str) -> String {
112 text
113 .replace("[0-9]", " digit number ")
115 .replace("[a-z]", " letter lowercase ")
116 .replace("[A-Z]", " letter uppercase ")
117 .replace("[a-zA-Z]", " letter alphabetic ")
118 .replace("[^>]", " ")
119 .replace(".*", " any anything ")
120 .replace(".+", " one more any ")
121 .replace("\\d+", " digits numbers numeric ")
122 .replace("\\w+", " words alphanumeric ")
123 .replace("\\s+", " whitespace spaces ")
124 .replace("\\S+", " nonwhitespace ")
125 .replace(
127 ['[', ']', '(', ')', '{', '}', '*', '+', '?', '^', '$', '|'],
128 " ",
129 )
130}
131
132fn expand_identifiers(text: &str) -> String {
134 let mut result = String::with_capacity(text.len() * 2);
135
136 for word in text.split_whitespace() {
137 if word.contains('_') {
139 for part in word.split('_') {
140 if !part.is_empty() {
141 result.push_str(part);
142 result.push(' ');
143 }
144 }
145 result.push_str(word);
147 result.push(' ');
148 }
149 else if word.chars().any(|c| c.is_uppercase()) && word.chars().any(|c| c.is_lowercase()) {
151 let mut prev_was_upper = false;
152 let mut current_word = String::new();
153
154 for c in word.chars() {
155 if c.is_uppercase() && !prev_was_upper && !current_word.is_empty() {
156 result.push_str(¤t_word.to_lowercase());
157 result.push(' ');
158 current_word.clear();
159 }
160 current_word.push(c);
161 prev_was_upper = c.is_uppercase();
162 }
163 if !current_word.is_empty() {
164 result.push_str(¤t_word.to_lowercase());
165 result.push(' ');
166 }
167 result.push_str(word);
169 result.push(' ');
170 } else {
171 result.push_str(word);
172 result.push(' ');
173 }
174 }
175
176 result
177}
178
179#[derive(Debug, Clone, Serialize, Deserialize)]
181pub struct DiscoverySpec {
182 #[serde(rename = "$schema", skip_serializing_if = "Option::is_none")]
184 pub schema: Option<String>,
185
186 pub server: ServerInfo,
188
189 pub tools: Vec<ToolSpec>,
191
192 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
194 pub categories: HashMap<String, CategoryInfo>,
195}
196
197#[derive(Debug, Clone, Serialize, Deserialize)]
199pub struct ServerInfo {
200 pub name: String,
202
203 #[serde(skip_serializing_if = "Option::is_none")]
205 pub version: Option<String>,
206
207 #[serde(skip_serializing_if = "Option::is_none")]
209 pub description: Option<String>,
210}
211
212#[derive(Debug, Clone, Serialize, Deserialize)]
214pub struct ToolSpec {
215 pub name: String,
217
218 #[serde(default, skip_serializing_if = "Vec::is_empty")]
220 pub aliases: Vec<String>,
221
222 #[serde(skip_serializing_if = "Option::is_none")]
224 pub category: Option<String>,
225
226 #[serde(skip_serializing_if = "Option::is_none")]
228 pub subcategory: Option<String>,
229
230 #[serde(default, skip_serializing_if = "Vec::is_empty")]
232 pub tags: Vec<String>,
233
234 #[serde(skip_serializing_if = "Option::is_none")]
236 pub summary: Option<String>,
237
238 #[serde(skip_serializing_if = "Option::is_none")]
240 pub description: Option<String>,
241
242 #[serde(default, skip_serializing_if = "Vec::is_empty")]
244 pub params: Vec<ParamSpec>,
245
246 #[serde(skip_serializing_if = "Option::is_none")]
248 pub returns: Option<ReturnSpec>,
249
250 #[serde(default, skip_serializing_if = "Vec::is_empty")]
252 pub examples: Vec<ExampleSpec>,
253
254 #[serde(default, skip_serializing_if = "Vec::is_empty")]
256 pub related: Vec<String>,
257
258 #[serde(skip_serializing_if = "Option::is_none")]
260 pub since: Option<String>,
261
262 #[serde(skip_serializing_if = "Option::is_none")]
264 pub stability: Option<String>,
265}
266
267#[derive(Debug, Clone, Serialize, Deserialize)]
269pub struct ParamSpec {
270 pub name: String,
272
273 #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
275 pub param_type: Option<String>,
276
277 #[serde(default)]
279 pub required: bool,
280
281 #[serde(skip_serializing_if = "Option::is_none")]
283 pub description: Option<String>,
284
285 #[serde(rename = "enum", skip_serializing_if = "Option::is_none")]
287 pub enum_values: Option<Vec<String>>,
288
289 #[serde(skip_serializing_if = "Option::is_none")]
291 pub default: Option<Value>,
292}
293
294#[derive(Debug, Clone, Serialize, Deserialize)]
296pub struct ReturnSpec {
297 #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
299 pub return_type: Option<String>,
300
301 #[serde(skip_serializing_if = "Option::is_none")]
303 pub description: Option<String>,
304}
305
306#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct ExampleSpec {
309 #[serde(skip_serializing_if = "Option::is_none")]
311 pub description: Option<String>,
312
313 #[serde(skip_serializing_if = "Option::is_none")]
315 pub args: Option<Value>,
316
317 #[serde(skip_serializing_if = "Option::is_none")]
319 pub result: Option<Value>,
320}
321
322#[derive(Debug, Clone, Serialize, Deserialize)]
324pub struct CategoryInfo {
325 #[serde(skip_serializing_if = "Option::is_none")]
327 pub description: Option<String>,
328
329 #[serde(default, skip_serializing_if = "Vec::is_empty")]
331 pub subcategories: Vec<String>,
332}
333
334#[derive(Debug)]
336pub struct DiscoveryRegistry {
337 servers: HashMap<String, DiscoverySpec>,
339
340 tools: HashMap<String, (String, ToolSpec)>,
342
343 index: Option<Bm25Index>,
345}
346
347impl Default for DiscoveryRegistry {
348 fn default() -> Self {
349 Self::new()
350 }
351}
352
353impl DiscoveryRegistry {
354 pub fn new() -> Self {
356 Self {
357 servers: HashMap::new(),
358 tools: HashMap::new(),
359 index: None,
360 }
361 }
362
363 pub fn register(&mut self, spec: DiscoverySpec, replace: bool) -> RegistrationResult {
365 let server_name = spec.server.name.clone();
366
367 if self.servers.contains_key(&server_name) && !replace {
369 return RegistrationResult {
370 ok: false,
371 tools_indexed: 0,
372 warnings: vec![format!(
373 "Server '{}' already registered. Use replace=true to update.",
374 server_name
375 )],
376 };
377 }
378
379 if replace {
381 self.tools.retain(|_, (srv, _)| srv != &server_name);
382 }
383
384 let mut warnings = Vec::new();
386 let mut tools_added = 0;
387
388 for tool in &spec.tools {
389 let tool_id = format!("{}:{}", server_name, tool.name);
390
391 if self.tools.contains_key(&tool_id) && !replace {
392 warnings.push(format!("Tool '{}' already exists, skipping", tool_id));
393 continue;
394 }
395
396 self.tools
397 .insert(tool_id, (server_name.clone(), tool.clone()));
398 tools_added += 1;
399 }
400
401 self.servers.insert(server_name, spec);
403
404 self.rebuild_index();
406
407 RegistrationResult {
408 ok: true,
409 tools_indexed: tools_added,
410 warnings,
411 }
412 }
413
414 pub fn unregister(&mut self, server_name: &str) -> bool {
416 if self.servers.remove(server_name).is_some() {
417 self.tools.retain(|_, (srv, _)| srv != server_name);
418 self.rebuild_index();
419 true
420 } else {
421 false
422 }
423 }
424
425 fn rebuild_index(&mut self) {
427 if self.tools.is_empty() {
428 self.index = None;
429 return;
430 }
431
432 let docs: Vec<Value> = self
434 .tools
435 .iter()
436 .map(|(id, (server, tool))| {
437 let summary = tool.summary.as_deref().unwrap_or("");
438 let description = tool.description.as_deref().unwrap_or("");
439
440 let expanded_summary = preprocess_for_search(summary);
442 let expanded_description = preprocess_for_search(description);
443
444 let examples_text: String = tool
446 .examples
447 .iter()
448 .filter_map(|ex| ex.description.as_ref())
449 .map(|d| preprocess_for_search(d))
450 .collect::<Vec<_>>()
451 .join(" ");
452
453 serde_json::json!({
454 "id": id,
455 "server": server,
456 "name": tool.name,
457 "aliases": tool.aliases.join(" "),
458 "category": tool.category.as_deref().unwrap_or(""),
459 "tags": tool.tags.join(" "),
460 "summary": summary,
461 "description": description,
462 "params": tool.params.iter().map(|p| p.name.as_str()).collect::<Vec<_>>().join(" "),
463 "expanded_summary": expanded_summary,
465 "expanded_description": expanded_description,
466 "expanded_examples": examples_text,
467 })
468 })
469 .collect();
470
471 let options = IndexOptions {
472 fields: vec![
473 "name".to_string(),
474 "aliases".to_string(),
475 "category".to_string(),
476 "tags".to_string(),
477 "summary".to_string(),
478 "description".to_string(),
479 "params".to_string(),
480 "expanded_summary".to_string(),
482 "expanded_description".to_string(),
483 "expanded_examples".to_string(),
484 ],
485 id_field: Some("id".to_string()),
486 stopwords: STOP_WORDS.iter().map(|s| s.to_string()).collect(),
487 ..Default::default()
488 };
489
490 self.index = Some(Bm25Index::build(&docs, options));
491 }
492
493 pub fn query(&self, query: &str, top_k: usize) -> Vec<ToolQueryResult> {
495 let Some(index) = &self.index else {
496 return Vec::new();
497 };
498
499 let results = index.search(query, top_k);
500
501 results
502 .into_iter()
503 .filter_map(|r| {
504 let (server, tool) = self.tools.get(&r.id)?;
505 Some(ToolQueryResult {
506 id: r.id,
507 server: server.clone(),
508 tool: tool.clone(),
509 score: r.score,
510 matches: r.matches,
511 })
512 })
513 .collect()
514 }
515
516 pub fn similar(&self, tool_id: &str, top_k: usize) -> Vec<ToolQueryResult> {
518 let Some(index) = &self.index else {
519 return Vec::new();
520 };
521
522 let results = index.similar(tool_id, top_k);
523
524 results
525 .into_iter()
526 .filter_map(|r| {
527 let (server, tool) = self.tools.get(&r.id)?;
528 Some(ToolQueryResult {
529 id: r.id,
530 server: server.clone(),
531 tool: tool.clone(),
532 score: r.score,
533 matches: r.matches,
534 })
535 })
536 .collect()
537 }
538
539 pub fn list_servers(&self) -> Vec<ServerSummary> {
541 self.servers
542 .iter()
543 .map(|(name, spec)| ServerSummary {
544 name: name.clone(),
545 version: spec.server.version.clone(),
546 description: spec.server.description.clone(),
547 tool_count: spec.tools.len(),
548 })
549 .collect()
550 }
551
552 pub fn list_categories(&self) -> HashMap<String, CategorySummary> {
554 let mut categories: HashMap<String, CategorySummary> = HashMap::new();
555
556 for (server, tool) in self.tools.values() {
557 if let Some(cat) = &tool.category {
558 let entry = categories.entry(cat.clone()).or_insert(CategorySummary {
559 name: cat.clone(),
560 tool_count: 0,
561 servers: Vec::new(),
562 subcategories: Vec::new(),
563 });
564 entry.tool_count += 1;
565 if !entry.servers.contains(server) {
566 entry.servers.push(server.clone());
567 }
568 if let Some(subcat) = tool
569 .subcategory
570 .as_ref()
571 .filter(|s| !entry.subcategories.contains(s))
572 {
573 entry.subcategories.push(subcat.clone());
574 }
575 }
576 }
577
578 categories
579 }
580
581 pub fn index_stats(&self) -> Option<IndexStats> {
583 let index = self.index.as_ref()?;
584
585 Some(IndexStats {
586 doc_count: index.doc_count,
587 term_count: index.terms.len(),
588 avg_doc_length: index.avg_doc_length,
589 server_count: self.servers.len(),
590 top_terms: index.terms().into_iter().take(20).collect(),
591 })
592 }
593
594 pub fn get_schema() -> Value {
596 serde_json::json!({
597 "$schema": "http://json-schema.org/draft-07/schema#",
598 "$id": "https://jpx.dev/schemas/mcp-discovery/v1.json",
599 "title": "MCP Discovery Spec",
600 "description": "Schema for registering MCP server capabilities with jpx",
601 "type": "object",
602 "required": ["server", "tools"],
603 "properties": {
604 "$schema": {
605 "type": "string",
606 "description": "JSON Schema reference"
607 },
608 "server": {
609 "type": "object",
610 "required": ["name"],
611 "properties": {
612 "name": {"type": "string", "description": "Server name"},
613 "version": {"type": "string", "description": "Server version"},
614 "description": {"type": "string", "description": "Server description"}
615 }
616 },
617 "tools": {
618 "type": "array",
619 "items": {
620 "type": "object",
621 "required": ["name"],
622 "properties": {
623 "name": {"type": "string", "description": "Tool name"},
624 "aliases": {"type": "array", "items": {"type": "string"}},
625 "category": {"type": "string"},
626 "subcategory": {"type": "string"},
627 "tags": {"type": "array", "items": {"type": "string"}},
628 "summary": {"type": "string", "description": "Short summary"},
629 "description": {"type": "string", "description": "Full description"},
630 "params": {
631 "type": "array",
632 "items": {
633 "type": "object",
634 "required": ["name"],
635 "properties": {
636 "name": {"type": "string"},
637 "type": {"type": "string"},
638 "required": {"type": "boolean"},
639 "description": {"type": "string"},
640 "enum": {"type": "array", "items": {"type": "string"}},
641 "default": {}
642 }
643 }
644 },
645 "returns": {
646 "type": "object",
647 "properties": {
648 "type": {"type": "string"},
649 "description": {"type": "string"}
650 }
651 },
652 "examples": {
653 "type": "array",
654 "items": {
655 "type": "object",
656 "properties": {
657 "description": {"type": "string"},
658 "args": {},
659 "result": {}
660 }
661 }
662 },
663 "related": {"type": "array", "items": {"type": "string"}},
664 "since": {"type": "string"},
665 "stability": {"type": "string", "enum": ["stable", "beta", "deprecated"]}
666 }
667 }
668 },
669 "categories": {
670 "type": "object",
671 "additionalProperties": {
672 "type": "object",
673 "properties": {
674 "description": {"type": "string"},
675 "subcategories": {"type": "array", "items": {"type": "string"}}
676 }
677 }
678 }
679 }
680 })
681 }
682}
683
684#[derive(Debug, Clone, Serialize, Deserialize)]
686pub struct RegistrationResult {
687 pub ok: bool,
688 pub tools_indexed: usize,
689 pub warnings: Vec<String>,
690}
691
692#[derive(Debug, Clone, Serialize, Deserialize)]
694pub struct ToolQueryResult {
695 pub id: String,
696 pub server: String,
697 pub tool: ToolSpec,
698 pub score: f64,
699 pub matches: HashMap<String, Vec<String>>,
700}
701
702#[derive(Debug, Clone, Serialize, Deserialize)]
704pub struct ServerSummary {
705 pub name: String,
706 pub version: Option<String>,
707 pub description: Option<String>,
708 pub tool_count: usize,
709}
710
711#[derive(Debug, Clone, Serialize, Deserialize)]
713pub struct CategorySummary {
714 pub name: String,
715 pub tool_count: usize,
716 pub servers: Vec<String>,
717 pub subcategories: Vec<String>,
718}
719
720#[derive(Debug, Clone, Serialize, Deserialize)]
722pub struct IndexStats {
723 pub doc_count: usize,
724 pub term_count: usize,
725 pub avg_doc_length: f64,
726 pub server_count: usize,
727 pub top_terms: Vec<(String, usize)>,
728}
729
730#[cfg(test)]
731mod tests {
732 use super::*;
733
734 fn sample_spec() -> DiscoverySpec {
735 serde_json::from_value(serde_json::json!({
736 "server": {
737 "name": "redisctl",
738 "version": "0.5.0",
739 "description": "Redis Enterprise management"
740 },
741 "tools": [
742 {
743 "name": "create_cluster",
744 "category": "clusters",
745 "tags": ["write", "provisioning"],
746 "summary": "Create a new Redis cluster",
747 "description": "Creates a new Redis Enterprise cluster with specified configuration"
748 },
749 {
750 "name": "delete_cluster",
751 "category": "clusters",
752 "tags": ["write", "destructive"],
753 "summary": "Delete a cluster",
754 "description": "Permanently deletes a Redis cluster"
755 },
756 {
757 "name": "list_backups",
758 "category": "backups",
759 "tags": ["read"],
760 "summary": "List all backups",
761 "description": "Lists all available backups for a cluster"
762 }
763 ]
764 })).unwrap()
765 }
766
767 #[test]
768 fn test_register_spec() {
769 let mut registry = DiscoveryRegistry::new();
770 let spec = sample_spec();
771
772 let result = registry.register(spec, false);
773
774 assert!(result.ok);
775 assert_eq!(result.tools_indexed, 3);
776 assert!(result.warnings.is_empty());
777 }
778
779 #[test]
780 fn test_query_tools() {
781 let mut registry = DiscoveryRegistry::new();
782 registry.register(sample_spec(), false);
783
784 let results = registry.query("cluster", 10);
785
786 assert!(!results.is_empty());
788 let top_names: Vec<_> = results
790 .iter()
791 .take(2)
792 .map(|r| r.tool.name.as_str())
793 .collect();
794 assert!(top_names.contains(&"create_cluster"));
795 assert!(top_names.contains(&"delete_cluster"));
796 }
797
798 #[test]
799 fn test_query_by_tag() {
800 let mut registry = DiscoveryRegistry::new();
801 registry.register(sample_spec(), false);
802
803 let results = registry.query("read", 10);
804
805 assert_eq!(results.len(), 1);
806 assert_eq!(results[0].tool.name, "list_backups");
807 }
808
809 #[test]
810 fn test_list_servers() {
811 let mut registry = DiscoveryRegistry::new();
812 registry.register(sample_spec(), false);
813
814 let servers = registry.list_servers();
815
816 assert_eq!(servers.len(), 1);
817 assert_eq!(servers[0].name, "redisctl");
818 assert_eq!(servers[0].tool_count, 3);
819 }
820
821 #[test]
822 fn test_list_categories() {
823 let mut registry = DiscoveryRegistry::new();
824 registry.register(sample_spec(), false);
825
826 let categories = registry.list_categories();
827
828 assert_eq!(categories.len(), 2);
829 assert!(categories.contains_key("clusters"));
830 assert!(categories.contains_key("backups"));
831 assert_eq!(categories.get("clusters").unwrap().tool_count, 2);
832 }
833
834 #[test]
835 fn test_unregister() {
836 let mut registry = DiscoveryRegistry::new();
837 registry.register(sample_spec(), false);
838
839 assert!(registry.unregister("redisctl"));
840 assert!(registry.list_servers().is_empty());
841 assert!(registry.query("cluster", 10).is_empty());
842 }
843
844 #[test]
845 fn test_replace_registration() {
846 let mut registry = DiscoveryRegistry::new();
847 registry.register(sample_spec(), false);
848
849 let result = registry.register(sample_spec(), false);
851 assert!(!result.ok);
852
853 let result = registry.register(sample_spec(), true);
855 assert!(result.ok);
856 }
857
858 #[test]
859 fn test_similar_tools() {
860 let mut registry = DiscoveryRegistry::new();
861 registry.register(sample_spec(), false);
862
863 let similar = registry.similar("redisctl:create_cluster", 10);
864
865 assert!(!similar.is_empty());
867 assert_eq!(similar[0].tool.name, "delete_cluster");
868 }
869
870 #[test]
871 fn test_minimal_spec() {
872 let minimal: DiscoverySpec = serde_json::from_value(serde_json::json!({
873 "server": {"name": "minimal"},
874 "tools": [{"name": "foo"}]
875 }))
876 .unwrap();
877
878 let mut registry = DiscoveryRegistry::new();
879 let result = registry.register(minimal, false);
880
881 assert!(result.ok);
882 assert_eq!(result.tools_indexed, 1);
883 }
884
885 #[test]
886 fn test_get_schema() {
887 let schema = DiscoveryRegistry::get_schema();
888
889 assert!(schema.get("$schema").is_some());
890 assert!(schema.get("properties").is_some());
891 }
892
893 #[test]
894 fn test_index_stats() {
895 let mut registry = DiscoveryRegistry::new();
896 registry.register(sample_spec(), false);
897
898 let stats = registry.index_stats().unwrap();
899
900 assert_eq!(stats.doc_count, 3);
901 assert_eq!(stats.server_count, 1);
902 assert!(stats.term_count > 0);
903 }
904
905 #[test]
908 fn test_strip_jmespath_literals() {
909 assert!(strip_jmespath_literals(r#"split text on `"\n"` newlines"#).contains("newline"));
911
912 let result = strip_jmespath_literals(r#"match `"\\d+"` digits"#);
914 assert!(result.contains("digit"));
915
916 let result = strip_jmespath_literals(r#"use `"\t"` for tabs and `"\n"` for lines"#);
918 assert!(result.contains("tab"));
919 assert!(result.contains("newline"));
920
921 let result = strip_jmespath_literals(r#"literal `123` number"#);
923 assert!(result.contains("123"));
924 }
925
926 #[test]
927 fn test_expand_escape_sequences() {
928 assert!(expand_escape_sequences(r"\n").contains("newline"));
929 assert!(expand_escape_sequences(r"\t").contains("tab"));
930 assert!(expand_escape_sequences(r"\d").contains("digit"));
931 assert!(expand_escape_sequences(r"\w").contains("word"));
932 assert!(expand_escape_sequences(r"\s").contains("whitespace"));
933 }
934
935 #[test]
936 fn test_expand_regex_patterns() {
937 assert!(expand_regex_patterns(r"\d+").contains("digits"));
938 assert!(expand_regex_patterns(r"\w+").contains("words"));
939 assert!(expand_regex_patterns(r"[0-9]").contains("digit"));
940 assert!(expand_regex_patterns(r"[a-zA-Z]").contains("letter"));
941 assert!(expand_regex_patterns(r".*").contains("any"));
942
943 let result = expand_regex_patterns(r"foo[bar]+baz");
945 assert!(!result.contains('['));
946 assert!(!result.contains(']'));
947 assert!(!result.contains('+'));
948 }
949
950 #[test]
951 fn test_expand_identifiers() {
952 let result = expand_identifiers("get_user_info");
954 assert!(result.contains("get"));
955 assert!(result.contains("user"));
956 assert!(result.contains("info"));
957 assert!(result.contains("get_user_info"));
959
960 let result = expand_identifiers("getUserInfo");
962 assert!(result.contains("get"));
963 assert!(result.contains("user"));
964 assert!(result.contains("info"));
965 assert!(result.contains("getUserInfo"));
967
968 let result = expand_identifiers("simple");
970 assert!(result.contains("simple"));
971 }
972
973 #[test]
974 fn test_preprocess_for_search_integration() {
975 let input = r#"Split on `"\n"` to get lines, use regex_extract for \d+ numbers"#;
977 let result = preprocess_for_search(input);
978
979 assert!(result.contains("newline") || result.contains("linebreak"));
981 assert!(result.contains("digit") || result.contains("number"));
982 assert!(result.contains("regex"));
983 assert!(result.contains("extract"));
984
985 assert!(!result.contains(" "));
987 }
988
989 #[test]
990 fn test_preprocess_preserves_search_terms() {
991 let input = "Create a new database connection";
993 let result = preprocess_for_search(input);
994
995 assert!(result.contains("Create"));
996 assert!(result.contains("database"));
997 assert!(result.contains("connection"));
998 }
999
1000 #[test]
1001 fn test_search_with_preprocessed_content() {
1002 let spec: DiscoverySpec = serde_json::from_value(serde_json::json!({
1004 "server": {"name": "text-tools"},
1005 "tools": [
1006 {
1007 "name": "split_lines",
1008 "summary": r#"Split text on newlines using `"\n"` delimiter"#,
1009 "description": r#"Splits input string on newline characters. Use split(@, `"\n"`) syntax."#
1010 },
1011 {
1012 "name": "extract_numbers",
1013 "summary": r#"Extract numeric patterns with regex `"\\d+"`"#,
1014 "description": r#"Uses regex_extract to find all \d+ digit sequences in text."#
1015 }
1016 ]
1017 }))
1018 .unwrap();
1019
1020 let mut registry = DiscoveryRegistry::new();
1021 registry.register(spec, false);
1022
1023 let results = registry.query("newline", 10);
1025 assert!(!results.is_empty());
1026 assert_eq!(results[0].tool.name, "split_lines");
1027
1028 let results = registry.query("digit", 10);
1030 assert!(!results.is_empty());
1031 assert_eq!(results[0].tool.name, "extract_numbers");
1032 }
1033}