sklears_core/
search_engines.rs

1use serde::{Deserialize, Serialize};
2use std::collections::{HashMap, HashSet};
3
4use crate::api_data_structures::{CodeExample, TraitInfo, TypeInfo};
5
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct SearchEngineConfig {
8    pub semantic_search_enabled: bool,
9    pub type_search_enabled: bool,
10    pub fuzzy_matching_enabled: bool,
11    pub autocomplete_enabled: bool,
12    pub max_results: usize,
13    pub similarity_threshold: f64,
14    pub indexing_batch_size: usize,
15    pub cache_size: usize,
16}
17
18impl Default for SearchEngineConfig {
19    fn default() -> Self {
20        Self {
21            semantic_search_enabled: true,
22            type_search_enabled: true,
23            fuzzy_matching_enabled: true,
24            autocomplete_enabled: true,
25            max_results: 50,
26            similarity_threshold: 0.3,
27            indexing_batch_size: 1000,
28            cache_size: 10000,
29        }
30    }
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct SearchQuery {
35    pub text: String,
36    pub query_type: SearchQueryType,
37    pub filters: SearchFilters,
38    pub options: SearchOptions,
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub enum SearchQueryType {
43    General,
44    Semantic,
45    TypeSignature,
46    Usage,
47    Documentation,
48    Examples,
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize, Default)]
52pub struct SearchFilters {
53    pub categories: Vec<ItemCategory>,
54    pub visibility: Vec<Visibility>,
55    pub stability: Vec<Stability>,
56    pub crates: Vec<String>,
57    pub modules: Vec<String>,
58}
59
60#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
61pub enum ItemCategory {
62    Trait,
63    Struct,
64    Enum,
65    Function,
66    Method,
67    Constant,
68    Type,
69    Module,
70    Macro,
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub enum Visibility {
75    Public,
76    Private,
77    Crate,
78    Super,
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub enum Stability {
83    Stable,
84    Unstable,
85    Deprecated,
86    Experimental,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct SearchOptions {
91    pub case_sensitive: bool,
92    pub whole_words_only: bool,
93    pub use_stemming: bool,
94    pub include_examples: bool,
95    pub include_tests: bool,
96    pub rank_by_usage: bool,
97}
98
99impl Default for SearchOptions {
100    fn default() -> Self {
101        Self {
102            case_sensitive: false,
103            whole_words_only: false,
104            use_stemming: true,
105            include_examples: true,
106            include_tests: false,
107            rank_by_usage: true,
108        }
109    }
110}
111
112#[derive(Debug, Clone, Serialize, Deserialize)]
113pub struct SearchResult {
114    pub id: String,
115    pub title: String,
116    pub description: String,
117    pub category: ItemCategory,
118    pub url: String,
119    pub score: f64,
120    pub snippet: Option<String>,
121    pub metadata: SearchResultMetadata,
122}
123
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct SearchResultMetadata {
126    pub crate_name: String,
127    pub module_path: String,
128    pub line_number: Option<usize>,
129    pub visibility: Visibility,
130    pub stability: Stability,
131    pub since_version: Option<String>,
132    pub deprecated_since: Option<String>,
133    pub related_items: Vec<String>,
134}
135
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct SearchIndex {
138    pub items: HashMap<String, IndexedItem>,
139    pub word_index: HashMap<String, HashSet<String>>,
140    pub type_index: HashMap<String, HashSet<String>>,
141    pub usage_index: HashMap<String, UsageInfo>,
142    pub semantic_index: SemanticIndex,
143    pub autocomplete_trie: AutocompleteTrie,
144    pub last_updated: chrono::DateTime<chrono::Utc>,
145}
146
147#[derive(Debug, Clone, Serialize, Deserialize)]
148pub struct IndexedItem {
149    pub id: String,
150    pub content: String,
151    pub category: ItemCategory,
152    pub keywords: Vec<String>,
153    pub type_signature: Option<String>,
154    pub documentation: String,
155    pub examples: Vec<String>,
156    pub metadata: SearchResultMetadata,
157    pub usage_count: usize,
158    pub popularity_score: f64,
159}
160
161#[derive(Debug, Clone, Serialize, Deserialize)]
162pub struct UsageInfo {
163    pub frequency: usize,
164    pub contexts: Vec<UsageContext>,
165    pub common_patterns: Vec<String>,
166    pub related_functions: Vec<String>,
167}
168
169#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct UsageContext {
171    pub location: String,
172    pub snippet: String,
173    pub description: String,
174}
175
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct SemanticIndex {
178    pub embeddings: HashMap<String, Vec<f32>>,
179    pub clusters: Vec<SemanticCluster>,
180    pub similarity_matrix: HashMap<String, HashMap<String, f64>>,
181}
182
183#[derive(Debug, Clone, Serialize, Deserialize)]
184pub struct SemanticCluster {
185    pub id: String,
186    pub center: Vec<f32>,
187    pub items: Vec<String>,
188    pub coherence_score: f64,
189}
190
191#[derive(Debug, Clone, Serialize, Deserialize)]
192pub struct AutocompleteTrie {
193    pub root: TrieNode,
194    pub suggestions_cache: HashMap<String, Vec<AutocompleteSuggestion>>,
195}
196
197#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct TrieNode {
199    pub value: Option<char>,
200    pub children: HashMap<char, TrieNode>,
201    pub is_end_of_word: bool,
202    pub completions: Vec<AutocompleteSuggestion>,
203    pub frequency: usize,
204}
205
206#[derive(Debug, Clone, Serialize, Deserialize)]
207pub struct AutocompleteSuggestion {
208    pub text: String,
209    pub category: ItemCategory,
210    pub description: String,
211    pub frequency: usize,
212    pub relevance_score: f64,
213}
214
215pub struct SemanticSearchEngine {
216    config: SearchEngineConfig,
217    index: SearchIndex,
218    query_cache: HashMap<String, Vec<SearchResult>>,
219    performance_metrics: SearchMetrics,
220}
221
222#[derive(Debug, Clone, Serialize, Deserialize)]
223pub struct SearchMetrics {
224    pub total_queries: usize,
225    pub cache_hits: usize,
226    pub average_response_time: f64,
227    pub index_size: usize,
228    pub last_reindex_time: chrono::DateTime<chrono::Utc>,
229}
230
231impl SemanticSearchEngine {
232    pub fn new(config: SearchEngineConfig) -> Self {
233        Self {
234            config,
235            index: SearchIndex::new(),
236            query_cache: HashMap::new(),
237            performance_metrics: SearchMetrics::default(),
238        }
239    }
240
241    pub fn build_index(
242        &mut self,
243        traits: &[TraitInfo],
244        types: &[TypeInfo],
245        examples: &[CodeExample],
246    ) -> Result<(), Box<dyn std::error::Error>> {
247        self.index_traits(traits)?;
248        self.index_types(types)?;
249        self.index_examples(examples)?;
250        self.build_semantic_embeddings()?;
251        self.build_autocomplete_trie()?;
252        self.update_usage_statistics()?;
253        Ok(())
254    }
255
256    fn index_traits(&mut self, traits: &[TraitInfo]) -> Result<(), Box<dyn std::error::Error>> {
257        for trait_info in traits {
258            let item = IndexedItem {
259                id: format!("trait_{}", trait_info.name),
260                content: format!("{} {}", trait_info.name, trait_info.description),
261                category: ItemCategory::Trait,
262                keywords: self.extract_keywords(&trait_info.description),
263                type_signature: Some(self.build_trait_signature(trait_info)),
264                documentation: trait_info.description.clone(),
265                examples: vec![], // Examples not directly available in TraitInfo
266                metadata: SearchResultMetadata {
267                    crate_name: "api".to_string(), // Default crate name
268                    module_path: trait_info.path.clone(),
269                    line_number: None,
270                    visibility: Visibility::Public,
271                    stability: Stability::Stable,
272                    since_version: None, // Not available in TraitInfo
273                    deprecated_since: None,
274                    related_items: trait_info.implementations.clone(),
275                },
276                usage_count: 0,
277                popularity_score: 0.0,
278            };
279
280            let item_id = item.id.clone();
281            self.index.items.insert(item_id.clone(), item);
282            self.index_words(&trait_info.name, &item_id);
283        }
284        Ok(())
285    }
286
287    fn index_types(&mut self, types: &[TypeInfo]) -> Result<(), Box<dyn std::error::Error>> {
288        for type_info in types {
289            let item = IndexedItem {
290                id: format!("type_{}", type_info.name),
291                content: format!("{} {}", type_info.name, type_info.description),
292                category: self.determine_type_category(type_info),
293                keywords: self.extract_keywords(&type_info.description),
294                type_signature: Some(format!("{:?}", type_info.kind)), // Convert enum to string
295                documentation: type_info.description.clone(),
296                examples: vec![], // Examples not directly available in TypeInfo
297                metadata: SearchResultMetadata {
298                    crate_name: "api".to_string(), // Default crate name
299                    module_path: type_info.path.clone(),
300                    line_number: None,
301                    visibility: Visibility::Public,
302                    stability: Stability::Stable,
303                    since_version: None, // Not available in TypeInfo
304                    deprecated_since: None,
305                    related_items: type_info.trait_impls.clone(),
306                },
307                usage_count: 0,
308                popularity_score: 0.0,
309            };
310
311            let item_id = item.id.clone();
312            self.index.items.insert(item_id.clone(), item);
313            self.index_words(&type_info.name, &item_id);
314            self.index_type_signature(&format!("{:?}", type_info.kind), &item_id);
315        }
316        Ok(())
317    }
318
319    fn index_examples(
320        &mut self,
321        examples: &[CodeExample],
322    ) -> Result<(), Box<dyn std::error::Error>> {
323        for (idx, example) in examples.iter().enumerate() {
324            let item = IndexedItem {
325                id: format!("example_{}", idx),
326                content: format!("{} {}", example.title, example.code),
327                category: ItemCategory::Function,
328                keywords: self.extract_keywords(&example.description),
329                type_signature: None,
330                documentation: example.description.clone(),
331                examples: vec![example.code.clone()],
332                metadata: SearchResultMetadata {
333                    crate_name: "examples".to_string(),
334                    module_path: "examples".to_string(),
335                    line_number: None,
336                    visibility: Visibility::Public,
337                    stability: Stability::Stable,
338                    since_version: None,
339                    deprecated_since: None,
340                    related_items: vec![],
341                },
342                usage_count: 0,
343                popularity_score: 0.0,
344            };
345
346            let item_id = item.id.clone();
347            self.index.items.insert(item_id.clone(), item);
348            self.index_words(&example.title, &item_id);
349            self.index_words(&example.description, &item_id);
350        }
351        Ok(())
352    }
353
354    fn build_trait_signature(&self, trait_info: &TraitInfo) -> String {
355        format!(
356            "trait {}{}",
357            trait_info.name,
358            if trait_info.generics.is_empty() {
359                String::new()
360            } else {
361                format!("<{}>", trait_info.generics.join(", "))
362            }
363        )
364    }
365
366    fn determine_type_category(&self, type_info: &TypeInfo) -> ItemCategory {
367        use crate::api_data_structures::TypeKind;
368        match type_info.kind {
369            TypeKind::Struct => ItemCategory::Struct,
370            TypeKind::Enum => ItemCategory::Enum,
371            TypeKind::Union => ItemCategory::Type,
372            TypeKind::TypeAlias => ItemCategory::Type,
373            TypeKind::Trait => ItemCategory::Trait,
374        }
375    }
376
377    fn extract_keywords(&self, text: &str) -> Vec<String> {
378        text.split_whitespace()
379            .map(|word| word.to_lowercase())
380            .filter(|word| word.len() > 2)
381            .collect()
382    }
383
384    fn index_words(&mut self, text: &str, item_id: &str) {
385        for word in text.split_whitespace() {
386            let word = word.to_lowercase();
387            self.index
388                .word_index
389                .entry(word)
390                .or_default()
391                .insert(item_id.to_string());
392        }
393    }
394
395    fn index_type_signature(&mut self, type_sig: &str, item_id: &str) {
396        self.index
397            .type_index
398            .entry(type_sig.to_string())
399            .or_default()
400            .insert(item_id.to_string());
401    }
402
403    fn build_semantic_embeddings(&mut self) -> Result<(), Box<dyn std::error::Error>> {
404        for (item_id, item) in &self.index.items {
405            let embedding = self.compute_embedding(&item.content);
406            self.index
407                .semantic_index
408                .embeddings
409                .insert(item_id.clone(), embedding);
410        }
411        self.build_semantic_clusters()?;
412        Ok(())
413    }
414
415    fn compute_embedding(&self, text: &str) -> Vec<f32> {
416        let words: Vec<&str> = text.split_whitespace().collect();
417        let mut embedding = vec![0.0; 300];
418
419        for (i, word) in words.iter().enumerate().take(300) {
420            embedding[i] = word.len() as f32;
421        }
422
423        embedding
424    }
425
426    fn build_semantic_clusters(&mut self) -> Result<(), Box<dyn std::error::Error>> {
427        let embeddings: Vec<(String, Vec<f32>)> = self
428            .index
429            .semantic_index
430            .embeddings
431            .iter()
432            .map(|(k, v)| (k.clone(), v.clone()))
433            .collect();
434
435        let clusters = self.k_means_clustering(&embeddings, 10)?;
436        self.index.semantic_index.clusters = clusters;
437        Ok(())
438    }
439
440    fn k_means_clustering(
441        &self,
442        embeddings: &[(String, Vec<f32>)],
443        k: usize,
444    ) -> Result<Vec<SemanticCluster>, Box<dyn std::error::Error>> {
445        let mut clusters = Vec::new();
446        let embedding_dim = embeddings.first().map(|(_, e)| e.len()).unwrap_or(300);
447
448        for i in 0..k {
449            clusters.push(SemanticCluster {
450                id: format!("cluster_{}", i),
451                center: vec![0.0; embedding_dim],
452                items: Vec::new(),
453                coherence_score: 0.0,
454            });
455        }
456
457        for (item_id, embedding) in embeddings {
458            let closest_cluster = self.find_closest_cluster(&clusters, embedding);
459            clusters[closest_cluster].items.push(item_id.clone());
460        }
461
462        Ok(clusters)
463    }
464
465    fn find_closest_cluster(&self, clusters: &[SemanticCluster], embedding: &[f32]) -> usize {
466        clusters
467            .iter()
468            .enumerate()
469            .min_by(|(_, a), (_, b)| {
470                let dist_a = self.cosine_distance(&a.center, embedding);
471                let dist_b = self.cosine_distance(&b.center, embedding);
472                dist_a
473                    .partial_cmp(&dist_b)
474                    .unwrap_or(std::cmp::Ordering::Equal)
475            })
476            .map(|(i, _)| i)
477            .unwrap_or(0)
478    }
479
480    fn cosine_distance(&self, a: &[f32], b: &[f32]) -> f64 {
481        let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
482        let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
483        let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
484
485        if norm_a == 0.0 || norm_b == 0.0 {
486            1.0
487        } else {
488            1.0 - (dot_product / (norm_a * norm_b)) as f64
489        }
490    }
491
492    fn build_autocomplete_trie(&mut self) -> Result<(), Box<dyn std::error::Error>> {
493        let mut trie = AutocompleteTrie::new();
494
495        for (item_id, item) in &self.index.items {
496            let suggestion = AutocompleteSuggestion {
497                text: item_id.clone(),
498                category: item.category.clone(),
499                description: item.documentation.clone(),
500                frequency: item.usage_count,
501                relevance_score: item.popularity_score,
502            };
503            trie.insert(item_id, suggestion);
504        }
505
506        self.index.autocomplete_trie = trie;
507        Ok(())
508    }
509
510    fn update_usage_statistics(&mut self) -> Result<(), Box<dyn std::error::Error>> {
511        let scores: Vec<(String, f64)> = self
512            .index
513            .items
514            .iter()
515            .map(|(id, item)| (id.clone(), self.calculate_popularity_score(item)))
516            .collect();
517
518        for (item_id, score) in scores {
519            if let Some(item) = self.index.items.get_mut(&item_id) {
520                item.popularity_score = score;
521            }
522        }
523        Ok(())
524    }
525
526    fn calculate_popularity_score(&self, item: &IndexedItem) -> f64 {
527        let base_score = item.usage_count as f64;
528        let documentation_score = if item.documentation.len() > 100 {
529            1.5
530        } else {
531            1.0
532        };
533        let examples_score = if !item.examples.is_empty() { 1.3 } else { 1.0 };
534
535        base_score * documentation_score * examples_score
536    }
537
538    pub fn search(
539        &mut self,
540        query: &SearchQuery,
541    ) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
542        let cache_key = self.build_cache_key(query);
543
544        if let Some(cached_results) = self.query_cache.get(&cache_key) {
545            self.performance_metrics.cache_hits += 1;
546            return Ok(cached_results.clone());
547        }
548
549        let results = match query.query_type {
550            SearchQueryType::Semantic => self.semantic_search(query)?,
551            SearchQueryType::TypeSignature => self.type_search(query)?,
552            SearchQueryType::Usage => self.usage_search(query)?,
553            _ => self.general_search(query)?,
554        };
555
556        let filtered_results = self.apply_filters(&results, &query.filters);
557        let ranked_results = self.rank_results(filtered_results, query);
558
559        let final_results: Vec<SearchResult> = ranked_results
560            .into_iter()
561            .take(self.config.max_results)
562            .collect();
563
564        self.query_cache.insert(cache_key, final_results.clone());
565        self.performance_metrics.total_queries += 1;
566
567        Ok(final_results)
568    }
569
570    fn build_cache_key(&self, query: &SearchQuery) -> String {
571        format!("{:?}", query)
572    }
573
574    fn semantic_search(
575        &self,
576        query: &SearchQuery,
577    ) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
578        let query_embedding = self.compute_embedding(&query.text);
579        let mut results = Vec::new();
580
581        for (item_id, item_embedding) in &self.index.semantic_index.embeddings {
582            let similarity = 1.0 - self.cosine_distance(&query_embedding, item_embedding);
583
584            if similarity >= self.config.similarity_threshold {
585                if let Some(item) = self.index.items.get(item_id) {
586                    results.push(SearchResult {
587                        id: item_id.clone(),
588                        title: item_id.clone(),
589                        description: item.documentation.clone(),
590                        category: item.category.clone(),
591                        url: format!("/docs/{}", item_id),
592                        score: similarity,
593                        snippet: self.generate_snippet(&item.content, &query.text),
594                        metadata: item.metadata.clone(),
595                    });
596                }
597            }
598        }
599
600        Ok(results)
601    }
602
603    fn type_search(
604        &self,
605        query: &SearchQuery,
606    ) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
607        let mut results = Vec::new();
608
609        for (type_sig, item_ids) in &self.index.type_index {
610            if type_sig.contains(&query.text) {
611                for item_id in item_ids {
612                    if let Some(item) = self.index.items.get(item_id) {
613                        results.push(SearchResult {
614                            id: item_id.clone(),
615                            title: item_id.clone(),
616                            description: item.documentation.clone(),
617                            category: item.category.clone(),
618                            url: format!("/docs/{}", item_id),
619                            score: self.calculate_type_match_score(type_sig, &query.text),
620                            snippet: item.type_signature.clone(),
621                            metadata: item.metadata.clone(),
622                        });
623                    }
624                }
625            }
626        }
627
628        Ok(results)
629    }
630
631    fn usage_search(
632        &self,
633        query: &SearchQuery,
634    ) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
635        let mut results = Vec::new();
636
637        for (pattern, usage_info) in &self.index.usage_index {
638            if pattern.contains(&query.text)
639                || usage_info
640                    .common_patterns
641                    .iter()
642                    .any(|p| p.contains(&query.text))
643            {
644                for context in &usage_info.contexts {
645                    results.push(SearchResult {
646                        id: format!("usage_{}", pattern),
647                        title: format!("Usage: {}", pattern),
648                        description: context.description.clone(),
649                        category: ItemCategory::Function,
650                        url: context.location.clone(),
651                        score: usage_info.frequency as f64,
652                        snippet: Some(context.snippet.clone()),
653                        metadata: SearchResultMetadata {
654                            crate_name: "usage".to_string(),
655                            module_path: pattern.clone(),
656                            line_number: None,
657                            visibility: Visibility::Public,
658                            stability: Stability::Stable,
659                            since_version: None,
660                            deprecated_since: None,
661                            related_items: usage_info.related_functions.clone(),
662                        },
663                    });
664                }
665            }
666        }
667
668        Ok(results)
669    }
670
671    fn general_search(
672        &self,
673        query: &SearchQuery,
674    ) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
675        let mut results = Vec::new();
676        let query_words: Vec<String> = query
677            .text
678            .split_whitespace()
679            .map(|w| w.to_lowercase())
680            .collect();
681
682        for word in &query_words {
683            if let Some(item_ids) = self.index.word_index.get(word) {
684                for item_id in item_ids {
685                    if let Some(item) = self.index.items.get(item_id) {
686                        let score = self.calculate_text_match_score(&item.content, &query.text);
687
688                        results.push(SearchResult {
689                            id: item_id.clone(),
690                            title: item_id.clone(),
691                            description: item.documentation.clone(),
692                            category: item.category.clone(),
693                            url: format!("/docs/{}", item_id),
694                            score,
695                            snippet: self.generate_snippet(&item.content, &query.text),
696                            metadata: item.metadata.clone(),
697                        });
698                    }
699                }
700            }
701        }
702
703        Ok(results)
704    }
705
706    fn calculate_type_match_score(&self, type_sig: &str, query: &str) -> f64 {
707        if type_sig == query {
708            1.0
709        } else if type_sig.contains(query) {
710            0.8
711        } else {
712            0.3
713        }
714    }
715
716    fn calculate_text_match_score(&self, content: &str, query: &str) -> f64 {
717        let content_lower = content.to_lowercase();
718        let query_lower = query.to_lowercase();
719
720        if content_lower.contains(&query_lower) {
721            let exact_matches = content_lower.matches(&query_lower).count();
722            let word_count = content.split_whitespace().count();
723            (exact_matches as f64) / (word_count as f64).max(1.0)
724        } else {
725            0.1
726        }
727    }
728
729    fn generate_snippet(&self, content: &str, query: &str) -> Option<String> {
730        let query_lower = query.to_lowercase();
731        let content_lower = content.to_lowercase();
732
733        if let Some(pos) = content_lower.find(&query_lower) {
734            let start = pos.saturating_sub(50);
735            let end = (pos + query.len() + 50).min(content.len());
736            Some(content[start..end].to_string())
737        } else {
738            Some(content.chars().take(100).collect())
739        }
740    }
741
742    fn apply_filters(
743        &self,
744        results: &[SearchResult],
745        filters: &SearchFilters,
746    ) -> Vec<SearchResult> {
747        results
748            .iter()
749            .filter(|result| {
750                if !filters.categories.is_empty() && !filters.categories.contains(&result.category)
751                {
752                    return false;
753                }
754                if !filters.crates.is_empty()
755                    && !filters.crates.contains(&result.metadata.crate_name)
756                {
757                    return false;
758                }
759                true
760            })
761            .cloned()
762            .collect()
763    }
764
765    fn rank_results(
766        &self,
767        mut results: Vec<SearchResult>,
768        _query: &SearchQuery,
769    ) -> Vec<SearchResult> {
770        results.sort_by(|a, b| {
771            let score_cmp = b
772                .score
773                .partial_cmp(&a.score)
774                .unwrap_or(std::cmp::Ordering::Equal);
775            if score_cmp != std::cmp::Ordering::Equal {
776                return score_cmp;
777            }
778
779            a.title.cmp(&b.title)
780        });
781
782        results
783    }
784
785    pub fn get_autocomplete_suggestions(
786        &self,
787        prefix: &str,
788        limit: usize,
789    ) -> Vec<AutocompleteSuggestion> {
790        self.index.autocomplete_trie.get_suggestions(prefix, limit)
791    }
792
793    pub fn get_search_metrics(&self) -> &SearchMetrics {
794        &self.performance_metrics
795    }
796}
797
798impl Default for SearchIndex {
799    fn default() -> Self {
800        Self::new()
801    }
802}
803
804impl SearchIndex {
805    pub fn new() -> Self {
806        Self {
807            items: HashMap::new(),
808            word_index: HashMap::new(),
809            type_index: HashMap::new(),
810            usage_index: HashMap::new(),
811            semantic_index: SemanticIndex::new(),
812            autocomplete_trie: AutocompleteTrie::new(),
813            last_updated: chrono::Utc::now(),
814        }
815    }
816}
817
818impl Default for SemanticIndex {
819    fn default() -> Self {
820        Self::new()
821    }
822}
823
824impl SemanticIndex {
825    pub fn new() -> Self {
826        Self {
827            embeddings: HashMap::new(),
828            clusters: Vec::new(),
829            similarity_matrix: HashMap::new(),
830        }
831    }
832}
833
834impl Default for AutocompleteTrie {
835    fn default() -> Self {
836        Self::new()
837    }
838}
839
840impl AutocompleteTrie {
841    pub fn new() -> Self {
842        Self {
843            root: TrieNode::new(),
844            suggestions_cache: HashMap::new(),
845        }
846    }
847
848    pub fn insert(&mut self, word: &str, suggestion: AutocompleteSuggestion) {
849        let mut current = &mut self.root;
850
851        for ch in word.chars() {
852            current = current.children.entry(ch).or_default();
853        }
854
855        current.is_end_of_word = true;
856        current.completions.push(suggestion);
857        current.frequency += 1;
858    }
859
860    pub fn get_suggestions(&self, prefix: &str, limit: usize) -> Vec<AutocompleteSuggestion> {
861        if let Some(cached) = self.suggestions_cache.get(prefix) {
862            return cached.iter().take(limit).cloned().collect();
863        }
864
865        let mut current = &self.root;
866
867        for ch in prefix.chars() {
868            if let Some(child) = current.children.get(&ch) {
869                current = child;
870            } else {
871                return Vec::new();
872            }
873        }
874
875        let mut suggestions = Vec::new();
876        self.collect_suggestions(current, &mut suggestions);
877
878        suggestions.sort_by(|a, b| {
879            b.relevance_score
880                .partial_cmp(&a.relevance_score)
881                .unwrap_or(std::cmp::Ordering::Equal)
882                .then_with(|| b.frequency.cmp(&a.frequency))
883        });
884
885        suggestions.into_iter().take(limit).collect()
886    }
887
888    #[allow(clippy::only_used_in_recursion)]
889    fn collect_suggestions(&self, node: &TrieNode, suggestions: &mut Vec<AutocompleteSuggestion>) {
890        if node.is_end_of_word {
891            suggestions.extend(node.completions.iter().cloned());
892        }
893
894        for child in node.children.values() {
895            self.collect_suggestions(child, suggestions);
896        }
897    }
898}
899
900impl Default for TrieNode {
901    fn default() -> Self {
902        Self::new()
903    }
904}
905
906impl TrieNode {
907    pub fn new() -> Self {
908        Self {
909            value: None,
910            children: HashMap::new(),
911            is_end_of_word: false,
912            completions: Vec::new(),
913            frequency: 0,
914        }
915    }
916}
917
918impl Default for SearchMetrics {
919    fn default() -> Self {
920        Self {
921            total_queries: 0,
922            cache_hits: 0,
923            average_response_time: 0.0,
924            index_size: 0,
925            last_reindex_time: chrono::Utc::now(),
926        }
927    }
928}
929
930#[allow(non_snake_case)]
931#[cfg(test)]
932mod tests {
933    use super::*;
934
935    #[test]
936    fn test_search_engine_creation() {
937        let config = SearchEngineConfig::default();
938        let engine = SemanticSearchEngine::new(config);
939        assert_eq!(engine.config.max_results, 50);
940    }
941
942    #[test]
943    fn test_autocomplete_trie() {
944        let mut trie = AutocompleteTrie::new();
945        let suggestion = AutocompleteSuggestion {
946            text: "test".to_string(),
947            category: ItemCategory::Function,
948            description: "Test function".to_string(),
949            frequency: 1,
950            relevance_score: 1.0,
951        };
952
953        trie.insert("test", suggestion);
954        let suggestions = trie.get_suggestions("te", 10);
955        assert_eq!(suggestions.len(), 1);
956        assert_eq!(suggestions[0].text, "test");
957    }
958
959    #[test]
960    fn test_search_query_creation() {
961        let query = SearchQuery {
962            text: "linear regression".to_string(),
963            query_type: SearchQueryType::Semantic,
964            filters: SearchFilters::default(),
965            options: SearchOptions::default(),
966        };
967
968        assert_eq!(query.text, "linear regression");
969        assert!(matches!(query.query_type, SearchQueryType::Semantic));
970    }
971
972    #[test]
973    fn test_cosine_distance() {
974        let engine = SemanticSearchEngine::new(SearchEngineConfig::default());
975        let vec1 = vec![1.0, 0.0, 0.0];
976        let vec2 = vec![0.0, 1.0, 0.0];
977
978        let distance = engine.cosine_distance(&vec1, &vec2);
979        assert!((distance - 1.0).abs() < 1e-6);
980    }
981
982    #[test]
983    fn test_embedding_computation() {
984        let engine = SemanticSearchEngine::new(SearchEngineConfig::default());
985        let embedding = engine.compute_embedding("test string");
986        assert_eq!(embedding.len(), 300);
987    }
988}