blz_core/
registry.rs

1use fuzzy_matcher::FuzzyMatcher;
2use fuzzy_matcher::skim::SkimMatcherV2;
3use serde::{Deserialize, Serialize};
4
5/// Registry entry representing a documented tool/package
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct RegistryEntry {
8    /// Display name of the tool/package
9    pub name: String,
10    /// Kebab-case identifier for the entry
11    pub slug: String,
12    /// Alternative names and common abbreviations
13    pub aliases: Vec<String>,
14    /// Brief description of the tool/package
15    pub description: String,
16    /// URL to the llms.txt documentation file
17    pub llms_url: String,
18}
19
20impl RegistryEntry {
21    /// Creates a new registry entry
22    #[must_use]
23    pub fn new(name: &str, slug: &str, description: &str, llms_url: &str) -> Self {
24        Self {
25            name: name.to_string(),
26            slug: slug.to_string(),
27            aliases: vec![slug.to_string()],
28            description: description.to_string(),
29            llms_url: llms_url.to_string(),
30        }
31    }
32
33    /// Sets the aliases for this registry entry
34    #[must_use]
35    pub fn with_aliases(mut self, aliases: &[&str]) -> Self {
36        self.aliases = aliases.iter().map(|s| (*s).to_string()).collect();
37        self
38    }
39}
40
41impl std::fmt::Display for RegistryEntry {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        write!(f, "{} ({})\n   {}", self.name, self.slug, self.description)
44    }
45}
46
47/// Registry for looking up documentation sources
48pub struct Registry {
49    /// List of all registered documentation sources
50    entries: Vec<RegistryEntry>,
51}
52
53impl Registry {
54    /// Create a new registry with hardcoded entries
55    /// Creates a new registry with built-in entries
56    #[must_use]
57    pub fn new() -> Self {
58        let entries = vec![
59            RegistryEntry::new(
60                "Bun",
61                "bun",
62                "Fast all-in-one JavaScript runtime and package manager",
63                "https://bun.sh/docs/llms.txt",
64            )
65            .with_aliases(&["bun", "bunjs"]),
66            RegistryEntry::new(
67                "Node.js",
68                "node",
69                "JavaScript runtime built on Chrome's V8 JavaScript engine",
70                "https://nodejs.org/docs/llms.txt",
71            )
72            .with_aliases(&["node", "nodejs", "js"]),
73            RegistryEntry::new(
74                "Deno",
75                "deno",
76                "Modern runtime for JavaScript and TypeScript",
77                "https://docs.deno.com/llms.txt",
78            )
79            .with_aliases(&["deno"]),
80            RegistryEntry::new(
81                "React",
82                "react",
83                "JavaScript library for building user interfaces",
84                "https://react.dev/llms.txt",
85            )
86            .with_aliases(&["react", "reactjs"]),
87            RegistryEntry::new(
88                "Vue.js",
89                "vue",
90                "Progressive JavaScript framework for building UIs",
91                "https://vuejs.org/llms.txt",
92            )
93            .with_aliases(&["vue", "vuejs"]),
94            RegistryEntry::new(
95                "Next.js",
96                "nextjs",
97                "React framework for production with hybrid static & server rendering",
98                "https://nextjs.org/docs/llms.txt",
99            )
100            .with_aliases(&["nextjs", "next"]),
101            RegistryEntry::new(
102                "Claude Code",
103                "claude-code",
104                "Anthropic's AI coding assistant documentation",
105                "https://docs.anthropic.com/claude-code/llms.txt",
106            )
107            .with_aliases(&["claude-code", "claude"]),
108            RegistryEntry::new(
109                "Pydantic",
110                "pydantic",
111                "Data validation library using Python type hints",
112                "https://docs.pydantic.dev/llms.txt",
113            )
114            .with_aliases(&["pydantic"]),
115            RegistryEntry::new(
116                "Anthropic Claude API",
117                "anthropic",
118                "Claude API documentation and guides",
119                "https://docs.anthropic.com/llms.txt",
120            )
121            .with_aliases(&["anthropic", "claude-api"]),
122            RegistryEntry::new(
123                "OpenAI API",
124                "openai",
125                "OpenAI API documentation and guides",
126                "https://platform.openai.com/docs/llms.txt",
127            )
128            .with_aliases(&["openai", "gpt"]),
129        ];
130
131        Self { entries }
132    }
133
134    /// Create a new registry with custom entries
135    #[must_use]
136    pub const fn from_entries(entries: Vec<RegistryEntry>) -> Self {
137        Self { entries }
138    }
139
140    /// Searches the registry for matching entries using fuzzy matching
141    #[must_use]
142    pub fn search(&self, query: &str) -> Vec<RegistrySearchResult> {
143        let matcher = SkimMatcherV2::default();
144        let query = query.trim().to_lowercase();
145
146        let mut results = Vec::new();
147
148        for entry in &self.entries {
149            let mut max_score = 0;
150            let mut best_match_field = "name";
151
152            // Try matching against name
153            if let Some(score) = matcher.fuzzy_match(&entry.name.to_lowercase(), &query) {
154                if score > max_score {
155                    max_score = score;
156                    best_match_field = "name";
157                }
158            }
159
160            // Try matching against slug
161            if let Some(score) = matcher.fuzzy_match(&entry.slug.to_lowercase(), &query) {
162                if score > max_score {
163                    max_score = score;
164                    best_match_field = "slug";
165                }
166            }
167
168            // Try matching against aliases
169            for alias in &entry.aliases {
170                if let Some(score) = matcher.fuzzy_match(&alias.to_lowercase(), &query) {
171                    if score > max_score {
172                        max_score = score;
173                        best_match_field = "alias";
174                    }
175                }
176            }
177
178            // Try matching against description (lower weight)
179            if let Some(score) = matcher.fuzzy_match(&entry.description.to_lowercase(), &query) {
180                let description_score = score / 2; // Lower weight for description matches
181                if description_score > max_score {
182                    max_score = description_score;
183                    best_match_field = "description";
184                }
185            }
186
187            if max_score > 0 {
188                results.push(RegistrySearchResult {
189                    entry: entry.clone(),
190                    score: max_score,
191                    match_field: best_match_field.to_string(),
192                });
193            }
194        }
195
196        // Sort by score descending
197        results.sort_by(|a, b| b.score.cmp(&a.score));
198
199        results
200    }
201
202    /// Get all registry entries
203    /// Returns all entries in the registry
204    #[must_use]
205    pub fn all_entries(&self) -> &[RegistryEntry] {
206        &self.entries
207    }
208}
209
210impl Default for Registry {
211    fn default() -> Self {
212        Self::new()
213    }
214}
215
216/// Search result from registry
217#[derive(Debug, Clone)]
218pub struct RegistrySearchResult {
219    /// The matched registry entry
220    pub entry: RegistryEntry,
221    /// Fuzzy matching score (higher is better)
222    pub score: i64,
223    /// Field that matched the search query (name, slug, or alias)
224    pub match_field: String,
225}
226
227#[cfg(test)]
228mod tests {
229    #![allow(clippy::unwrap_used)]
230    use super::*;
231
232    #[test]
233    fn test_registry_entry_creation() {
234        let entry = RegistryEntry::new(
235            "React",
236            "react",
237            "JavaScript library for building user interfaces",
238            "https://react.dev/llms.txt",
239        );
240
241        assert_eq!(entry.name, "React");
242        assert_eq!(entry.slug, "react");
243        assert_eq!(entry.aliases, vec!["react"]);
244        assert!(entry.description.contains("JavaScript library"));
245        assert_eq!(entry.llms_url, "https://react.dev/llms.txt");
246    }
247
248    #[test]
249    fn test_registry_entry_with_aliases() {
250        let entry = RegistryEntry::new(
251            "Node.js",
252            "node",
253            "JavaScript runtime",
254            "https://nodejs.org/llms.txt",
255        )
256        .with_aliases(&["node", "nodejs", "js"]);
257
258        assert_eq!(entry.aliases, vec!["node", "nodejs", "js"]);
259    }
260
261    #[test]
262    fn test_registry_creation() {
263        let registry = Registry::new();
264        let entries = registry.all_entries();
265
266        assert!(!entries.is_empty());
267
268        // Check that we have some expected entries
269        let react_entry = entries.iter().find(|e| e.slug == "react");
270        assert!(react_entry.is_some());
271
272        let node_entry = entries.iter().find(|e| e.slug == "node");
273        assert!(node_entry.is_some());
274
275        let claude_entry = entries.iter().find(|e| e.slug == "claude-code");
276        assert!(claude_entry.is_some());
277    }
278
279    #[test]
280    fn test_registry_search_exact_match() {
281        let registry = Registry::new();
282        let results = registry.search("react");
283
284        assert!(!results.is_empty());
285        // Should find React as top result
286        let top_result = &results[0];
287        assert_eq!(top_result.entry.slug, "react");
288    }
289
290    #[test]
291    fn test_registry_search_fuzzy_match() {
292        let registry = Registry::new();
293        let results = registry.search("reactjs");
294
295        assert!(!results.is_empty());
296        // Should find React even with "reactjs" query
297        let react_result = results.iter().find(|r| r.entry.slug == "react");
298        assert!(react_result.is_some());
299    }
300
301    #[test]
302    fn test_registry_search_partial_match() {
303        let registry = Registry::new();
304        let results = registry.search("claude");
305
306        assert!(!results.is_empty());
307        // Should find Claude-related entries
308        let has_claude = results.iter().any(|r| r.entry.slug.contains("claude"));
309        assert!(has_claude);
310    }
311
312    #[test]
313    fn test_registry_search_description_match() {
314        let registry = Registry::new();
315        let results = registry.search("javascript runtime");
316
317        assert!(!results.is_empty());
318        // Should find entries with "javascript runtime" in description
319        let has_js_runtime = results.iter().any(|r| {
320            r.entry.description.to_lowercase().contains("javascript")
321                && r.entry.description.to_lowercase().contains("runtime")
322        });
323        assert!(has_js_runtime);
324    }
325
326    #[test]
327    fn test_registry_search_no_match() {
328        let registry = Registry::new();
329        let results = registry.search("nonexistentframework");
330
331        // Should return empty results for nonsensical query
332        assert!(results.is_empty() || results[0].score < 50);
333    }
334
335    #[test]
336    fn test_registry_search_case_insensitive() {
337        let registry = Registry::new();
338        let results_lower = registry.search("react");
339        let results_upper = registry.search("REACT");
340        let results_mixed = registry.search("React");
341
342        assert!(!results_lower.is_empty());
343        assert!(!results_upper.is_empty());
344        assert!(!results_mixed.is_empty());
345
346        // All should find the same entry
347        assert_eq!(results_lower[0].entry.slug, "react");
348        assert_eq!(results_upper[0].entry.slug, "react");
349        assert_eq!(results_mixed[0].entry.slug, "react");
350    }
351
352    #[test]
353    fn test_registry_display_format() {
354        let entry = RegistryEntry::new(
355            "React",
356            "react",
357            "JavaScript library for building user interfaces",
358            "https://react.dev/llms.txt",
359        );
360
361        let display = entry.to_string();
362        assert!(display.contains("React"));
363        assert!(display.contains("(react)"));
364        assert!(display.contains("JavaScript library"));
365    }
366
367    #[test]
368    fn test_all_registry_entries_have_valid_urls() {
369        let registry = Registry::new();
370
371        for entry in registry.all_entries() {
372            // Check that URL looks like a valid HTTP/HTTPS URL
373            assert!(
374                entry.llms_url.starts_with("http://") || entry.llms_url.starts_with("https://")
375            );
376            // Check that URL ends with .txt (case-insensitive)
377            assert!(
378                std::path::Path::new(&entry.llms_url)
379                    .extension()
380                    .is_some_and(|ext| ext.eq_ignore_ascii_case("txt"))
381            );
382            // Check that slug is kebab-case (no spaces, lowercase)
383            assert!(!entry.slug.contains(' '));
384            assert!(!entry.slug.chars().any(char::is_uppercase));
385        }
386    }
387
388    #[test]
389    fn test_registry_entries_have_unique_slugs() {
390        let registry = Registry::new();
391        let entries = registry.all_entries();
392
393        let mut slugs = std::collections::HashSet::new();
394        for entry in entries {
395            assert!(
396                slugs.insert(&entry.slug),
397                "Duplicate slug found: {}",
398                entry.slug
399            );
400        }
401    }
402
403    // Registry edge cases tests - Unicode and special characters
404    #[test]
405    fn test_registry_search_unicode_queries() {
406        let registry = Registry::new();
407
408        // Test CJK characters
409        let results = registry.search("日本語");
410        assert!(results.is_empty() || results.iter().all(|r| r.score < 100));
411
412        // Test Arabic text (RTL)
413        let results = registry.search("العربية");
414        assert!(results.is_empty() || results.iter().all(|r| r.score < 100));
415
416        // Test Cyrillic
417        let results = registry.search("русский");
418        assert!(results.is_empty() || results.iter().all(|r| r.score < 100));
419
420        // Test emoji
421        let results = registry.search("🚀");
422        assert!(results.is_empty() || results.iter().all(|r| r.score < 100));
423
424        // Test mixed scripts
425        let results = registry.search("react 日本語");
426        // Mixed scripts might confuse the fuzzy matcher
427        // Just verify it doesn't crash
428        assert!(results.len() <= registry.all_entries().len());
429    }
430
431    #[test]
432    fn test_registry_search_very_long_queries() {
433        let registry = Registry::new();
434
435        // Test extremely long query
436        let long_query = "javascript".repeat(1000);
437        let results = registry.search(&long_query);
438
439        // Should handle gracefully without crashing
440        // May return empty or partial results due to fuzzy matching limits
441        assert!(results.len() <= registry.all_entries().len());
442    }
443
444    #[test]
445    fn test_registry_search_empty_and_whitespace() {
446        let registry = Registry::new();
447
448        // Test empty string
449        let results = registry.search("");
450        assert!(results.is_empty());
451
452        // Test whitespace-only queries
453        let whitespace_queries = vec!["   ", "\t", "\n", "\r\n", " \t \n "];
454
455        for query in whitespace_queries {
456            let results = registry.search(query);
457            assert!(
458                results.is_empty(),
459                "Whitespace query '{}' should return empty",
460                query.escape_debug()
461            );
462        }
463    }
464
465    #[test]
466    fn test_registry_search_special_characters() {
467        let registry = Registry::new();
468
469        // Test various punctuation and special characters
470        let special_chars = vec![
471            "!@#$%^&*()",
472            "[]{}|\\;':\",./<>?",
473            "~`",
474            "react!",
475            "node.js",
476            "vue-js",
477            "next/js",
478            "c++",
479            "c#",
480            ".net",
481            "node@18",
482        ];
483
484        for query in special_chars {
485            let results = registry.search(query);
486
487            // Should not crash and return reasonable results
488            assert!(results.len() <= registry.all_entries().len());
489
490            // Special character queries might not match exact entries
491            // The fuzzy matcher handles these differently
492            // Just verify that search doesn't crash and returns valid results
493        }
494    }
495
496    #[test]
497    fn test_registry_search_multiple_spaces() {
498        let registry = Registry::new();
499
500        // Test queries with multiple spaces
501        let spaced_queries = vec![
502            "javascript  runtime",
503            "javascript   runtime",
504            "   javascript runtime   ",
505            "javascript\truntime",
506            "javascript\n\nruntime",
507        ];
508
509        for query in spaced_queries {
510            let results = registry.search(query);
511
512            // Multiple spaces might affect fuzzy matching
513            // Just verify that search returns some results without crashing
514            // The fuzzy matcher may or may not handle multiple spaces well
515            assert!(results.len() <= registry.all_entries().len());
516        }
517    }
518
519    #[test]
520    fn test_registry_search_leading_trailing_whitespace() {
521        let registry = Registry::new();
522
523        let query_variants = vec![
524            "react",
525            " react",
526            "react ",
527            " react ",
528            "\treact\t",
529            "\nreact\n",
530            "  \t react \n  ",
531        ];
532
533        for query in query_variants {
534            let results = registry.search(query);
535
536            // All variants should find React
537            assert!(
538                !results.is_empty(),
539                "Query '{}' should find results",
540                query.escape_debug()
541            );
542            assert_eq!(results[0].entry.slug, "react");
543        }
544    }
545
546    #[test]
547    fn test_registry_search_fuzzy_matching_edge_cases() {
548        let registry = Registry::new();
549
550        // Test various typos and fuzzy matches
551        // Note: Fuzzy matching has limits - not all typos will match
552        let fuzzy_cases = vec![
553            ("react", "react"),   // Exact match should work
554            ("nodejs", "node"),   // Common alternative spelling
555            ("nextjs", "nextjs"), // Exact match
556            ("vue", "vue"),       // Exact match
557        ];
558
559        for (query, expected_slug) in fuzzy_cases {
560            let results = registry.search(query);
561
562            assert!(
563                !results.is_empty(),
564                "Query '{query}' should find results for '{expected_slug}'"
565            );
566
567            // Should find the expected entry for exact or close matches
568            let found_expected = results.iter().any(|r| r.entry.slug == expected_slug);
569            assert!(
570                found_expected,
571                "Query '{query}' should find entry '{expected_slug}'"
572            );
573        }
574
575        // Test that typos don't crash the search
576        let typo_queries = vec!["reactt", "reac", "raect", "nxtjs", "vue.js"];
577        for query in typo_queries {
578            let results = registry.search(query);
579            // Just verify it doesn't crash
580            assert!(results.len() <= registry.all_entries().len());
581        }
582    }
583
584    #[test]
585    fn test_registry_search_score_ranking() {
586        let registry = Registry::new();
587
588        // Test that exact matches score higher than partial matches
589        let results = registry.search("react");
590        assert!(!results.is_empty());
591
592        // React should be the top result for "react" query
593        assert_eq!(results[0].entry.slug, "react");
594
595        // Test that name matches score higher than description matches
596        let results = registry.search("node");
597        assert!(!results.is_empty());
598
599        // Node.js entry should score higher than entries that only mention "node" in description
600        let node_result = results.iter().find(|r| r.entry.slug == "node");
601        assert!(node_result.is_some());
602
603        // The Node.js result should have a high score
604        let node_score = node_result.unwrap().score;
605        assert!(
606            node_score > 50,
607            "Node.js should have high score for 'node' query"
608        );
609    }
610
611    #[test]
612    fn test_registry_search_alias_matching() {
613        let registry = Registry::new();
614
615        // Test searches that should match via aliases
616        let alias_tests = vec![
617            ("reactjs", "react"),
618            ("nodejs", "node"),
619            ("js", "node"),
620            ("bunjs", "bun"),
621            ("claude", "claude-code"),
622            ("claude-api", "anthropic"),
623            ("gpt", "openai"),
624        ];
625
626        for (query, expected_slug) in alias_tests {
627            let results = registry.search(query);
628
629            assert!(!results.is_empty(), "Query '{query}' should find results");
630
631            let found_entry = results.iter().find(|r| r.entry.slug == expected_slug);
632            assert!(
633                found_entry.is_some(),
634                "Query '{query}' should find entry '{expected_slug}'"
635            );
636
637            // Should be marked as alias match
638            let found = found_entry.unwrap();
639            assert!(
640                found.match_field == "alias"
641                    || found.match_field == "slug"
642                    || found.match_field == "name",
643                "Match field should indicate alias/slug/name match for '{}' -> '{}', got '{}'",
644                query,
645                expected_slug,
646                found.match_field
647            );
648        }
649    }
650
651    #[test]
652    fn test_registry_search_case_variations() {
653        let registry = Registry::new();
654
655        let test_cases = vec!["REACT", "React", "rEaCt", "react"];
656
657        let mut all_scores = Vec::new();
658
659        for query in &test_cases {
660            let results = registry.search(query);
661            assert!(!results.is_empty(), "Query '{query}' should find results");
662            assert_eq!(results[0].entry.slug, "react");
663            all_scores.push(results[0].score);
664        }
665
666        // All case variations should produce similar scores
667        let min_score = *all_scores.iter().min().unwrap();
668        let max_score = *all_scores.iter().max().unwrap();
669
670        // Scores should be within reasonable range of each other
671        assert!(
672            (max_score - min_score) <= 50,
673            "Case variations should have similar scores"
674        );
675    }
676
677    #[test]
678    fn test_registry_search_performance() {
679        let registry = Registry::new();
680
681        // Test that search performance is reasonable even with many queries
682        let queries = vec![
683            "react",
684            "node",
685            "vue",
686            "angular",
687            "javascript",
688            "typescript",
689            "python",
690            "rust",
691            "go",
692            "java",
693            "c++",
694            "c#",
695            "nonexistent",
696            "blahblahblah",
697            "qwerty",
698            "asdfgh",
699        ];
700
701        let start_time = std::time::Instant::now();
702
703        for query in &queries {
704            let results = registry.search(query);
705            // Ensure we actually process the results
706            assert!(results.len() <= registry.all_entries().len());
707        }
708
709        let elapsed = start_time.elapsed();
710
711        // Should complete reasonably quickly (adjust threshold as needed)
712        assert!(
713            elapsed < std::time::Duration::from_millis(100),
714            "Registry search should be fast, took {elapsed:?}"
715        );
716    }
717
718    #[test]
719    fn test_registry_search_boundary_conditions() {
720        let registry = Registry::new();
721
722        // Test single characters
723        let single_chars = vec!["a", "j", "r", "n", "v"];
724        for char_query in single_chars {
725            let results = registry.search(char_query);
726            // Single characters might match multiple entries or none
727            assert!(results.len() <= registry.all_entries().len());
728        }
729
730        // Test maximum reasonable query length
731        let max_query = "a".repeat(1000);
732        let results = registry.search(&max_query);
733        assert!(results.len() <= registry.all_entries().len());
734
735        // Test query with only punctuation
736        let punct_results = registry.search("!@#$%^&*()");
737        assert!(punct_results.is_empty() || punct_results.iter().all(|r| r.score < 50));
738    }
739
740    #[test]
741    fn test_registry_search_description_weighting() {
742        let registry = Registry::new();
743
744        // Search for terms that appear in descriptions
745        let results = registry.search("documentation");
746
747        if !results.is_empty() {
748            // Results should be sorted by score
749            for i in 1..results.len() {
750                assert!(
751                    results[i - 1].score >= results[i].score,
752                    "Results should be sorted by score descending"
753                );
754            }
755
756            // Description matches should have lower scores than name/slug matches
757            let desc_matches = results
758                .iter()
759                .filter(|r| r.match_field == "description")
760                .collect::<Vec<_>>();
761            let name_matches = results
762                .iter()
763                .filter(|r| r.match_field == "name" || r.match_field == "slug")
764                .collect::<Vec<_>>();
765
766            if !desc_matches.is_empty() && !name_matches.is_empty() {
767                let max_desc_score = desc_matches.iter().map(|r| r.score).max().unwrap();
768                let min_name_score = name_matches.iter().map(|r| r.score).min().unwrap();
769
770                // Description matches should generally score lower (though this isn't strict)
771                // This test verifies the weighting logic is applied
772                if max_desc_score > min_name_score {
773                    // This is fine - sometimes description matches can be very relevant
774                } else {
775                    assert!(
776                        max_desc_score <= min_name_score * 2,
777                        "Description match scores should be weighted appropriately"
778                    );
779                }
780            }
781        }
782    }
783}