1use serde::{Deserialize, Serialize};
2use std::collections::{HashMap, HashSet};
3
4use crate::api_data_structures::{CodeExample, TraitInfo, TypeInfo};
5
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct SearchEngineConfig {
8 pub semantic_search_enabled: bool,
9 pub type_search_enabled: bool,
10 pub fuzzy_matching_enabled: bool,
11 pub autocomplete_enabled: bool,
12 pub max_results: usize,
13 pub similarity_threshold: f64,
14 pub indexing_batch_size: usize,
15 pub cache_size: usize,
16}
17
18impl Default for SearchEngineConfig {
19 fn default() -> Self {
20 Self {
21 semantic_search_enabled: true,
22 type_search_enabled: true,
23 fuzzy_matching_enabled: true,
24 autocomplete_enabled: true,
25 max_results: 50,
26 similarity_threshold: 0.3,
27 indexing_batch_size: 1000,
28 cache_size: 10000,
29 }
30 }
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct SearchQuery {
35 pub text: String,
36 pub query_type: SearchQueryType,
37 pub filters: SearchFilters,
38 pub options: SearchOptions,
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub enum SearchQueryType {
43 General,
44 Semantic,
45 TypeSignature,
46 Usage,
47 Documentation,
48 Examples,
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize, Default)]
52pub struct SearchFilters {
53 pub categories: Vec<ItemCategory>,
54 pub visibility: Vec<Visibility>,
55 pub stability: Vec<Stability>,
56 pub crates: Vec<String>,
57 pub modules: Vec<String>,
58}
59
60#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
61pub enum ItemCategory {
62 Trait,
63 Struct,
64 Enum,
65 Function,
66 Method,
67 Constant,
68 Type,
69 Module,
70 Macro,
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub enum Visibility {
75 Public,
76 Private,
77 Crate,
78 Super,
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub enum Stability {
83 Stable,
84 Unstable,
85 Deprecated,
86 Experimental,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct SearchOptions {
91 pub case_sensitive: bool,
92 pub whole_words_only: bool,
93 pub use_stemming: bool,
94 pub include_examples: bool,
95 pub include_tests: bool,
96 pub rank_by_usage: bool,
97}
98
99impl Default for SearchOptions {
100 fn default() -> Self {
101 Self {
102 case_sensitive: false,
103 whole_words_only: false,
104 use_stemming: true,
105 include_examples: true,
106 include_tests: false,
107 rank_by_usage: true,
108 }
109 }
110}
111
112#[derive(Debug, Clone, Serialize, Deserialize)]
113pub struct SearchResult {
114 pub id: String,
115 pub title: String,
116 pub description: String,
117 pub category: ItemCategory,
118 pub url: String,
119 pub score: f64,
120 pub snippet: Option<String>,
121 pub metadata: SearchResultMetadata,
122}
123
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct SearchResultMetadata {
126 pub crate_name: String,
127 pub module_path: String,
128 pub line_number: Option<usize>,
129 pub visibility: Visibility,
130 pub stability: Stability,
131 pub since_version: Option<String>,
132 pub deprecated_since: Option<String>,
133 pub related_items: Vec<String>,
134}
135
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct SearchIndex {
138 pub items: HashMap<String, IndexedItem>,
139 pub word_index: HashMap<String, HashSet<String>>,
140 pub type_index: HashMap<String, HashSet<String>>,
141 pub usage_index: HashMap<String, UsageInfo>,
142 pub semantic_index: SemanticIndex,
143 pub autocomplete_trie: AutocompleteTrie,
144 pub last_updated: chrono::DateTime<chrono::Utc>,
145}
146
147#[derive(Debug, Clone, Serialize, Deserialize)]
148pub struct IndexedItem {
149 pub id: String,
150 pub content: String,
151 pub category: ItemCategory,
152 pub keywords: Vec<String>,
153 pub type_signature: Option<String>,
154 pub documentation: String,
155 pub examples: Vec<String>,
156 pub metadata: SearchResultMetadata,
157 pub usage_count: usize,
158 pub popularity_score: f64,
159}
160
161#[derive(Debug, Clone, Serialize, Deserialize)]
162pub struct UsageInfo {
163 pub frequency: usize,
164 pub contexts: Vec<UsageContext>,
165 pub common_patterns: Vec<String>,
166 pub related_functions: Vec<String>,
167}
168
169#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct UsageContext {
171 pub location: String,
172 pub snippet: String,
173 pub description: String,
174}
175
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct SemanticIndex {
178 pub embeddings: HashMap<String, Vec<f32>>,
179 pub clusters: Vec<SemanticCluster>,
180 pub similarity_matrix: HashMap<String, HashMap<String, f64>>,
181}
182
183#[derive(Debug, Clone, Serialize, Deserialize)]
184pub struct SemanticCluster {
185 pub id: String,
186 pub center: Vec<f32>,
187 pub items: Vec<String>,
188 pub coherence_score: f64,
189}
190
191#[derive(Debug, Clone, Serialize, Deserialize)]
192pub struct AutocompleteTrie {
193 pub root: TrieNode,
194 pub suggestions_cache: HashMap<String, Vec<AutocompleteSuggestion>>,
195}
196
197#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct TrieNode {
199 pub value: Option<char>,
200 pub children: HashMap<char, TrieNode>,
201 pub is_end_of_word: bool,
202 pub completions: Vec<AutocompleteSuggestion>,
203 pub frequency: usize,
204}
205
206#[derive(Debug, Clone, Serialize, Deserialize)]
207pub struct AutocompleteSuggestion {
208 pub text: String,
209 pub category: ItemCategory,
210 pub description: String,
211 pub frequency: usize,
212 pub relevance_score: f64,
213}
214
215pub struct SemanticSearchEngine {
216 config: SearchEngineConfig,
217 index: SearchIndex,
218 query_cache: HashMap<String, Vec<SearchResult>>,
219 performance_metrics: SearchMetrics,
220}
221
222#[derive(Debug, Clone, Serialize, Deserialize)]
223pub struct SearchMetrics {
224 pub total_queries: usize,
225 pub cache_hits: usize,
226 pub average_response_time: f64,
227 pub index_size: usize,
228 pub last_reindex_time: chrono::DateTime<chrono::Utc>,
229}
230
231impl SemanticSearchEngine {
232 pub fn new(config: SearchEngineConfig) -> Self {
233 Self {
234 config,
235 index: SearchIndex::new(),
236 query_cache: HashMap::new(),
237 performance_metrics: SearchMetrics::default(),
238 }
239 }
240
241 pub fn build_index(
242 &mut self,
243 traits: &[TraitInfo],
244 types: &[TypeInfo],
245 examples: &[CodeExample],
246 ) -> Result<(), Box<dyn std::error::Error>> {
247 self.index_traits(traits)?;
248 self.index_types(types)?;
249 self.index_examples(examples)?;
250 self.build_semantic_embeddings()?;
251 self.build_autocomplete_trie()?;
252 self.update_usage_statistics()?;
253 Ok(())
254 }
255
256 fn index_traits(&mut self, traits: &[TraitInfo]) -> Result<(), Box<dyn std::error::Error>> {
257 for trait_info in traits {
258 let item = IndexedItem {
259 id: format!("trait_{}", trait_info.name),
260 content: format!("{} {}", trait_info.name, trait_info.description),
261 category: ItemCategory::Trait,
262 keywords: self.extract_keywords(&trait_info.description),
263 type_signature: Some(self.build_trait_signature(trait_info)),
264 documentation: trait_info.description.clone(),
265 examples: vec![], metadata: SearchResultMetadata {
267 crate_name: "api".to_string(), module_path: trait_info.path.clone(),
269 line_number: None,
270 visibility: Visibility::Public,
271 stability: Stability::Stable,
272 since_version: None, deprecated_since: None,
274 related_items: trait_info.implementations.clone(),
275 },
276 usage_count: 0,
277 popularity_score: 0.0,
278 };
279
280 let item_id = item.id.clone();
281 self.index.items.insert(item_id.clone(), item);
282 self.index_words(&trait_info.name, &item_id);
283 }
284 Ok(())
285 }
286
287 fn index_types(&mut self, types: &[TypeInfo]) -> Result<(), Box<dyn std::error::Error>> {
288 for type_info in types {
289 let item = IndexedItem {
290 id: format!("type_{}", type_info.name),
291 content: format!("{} {}", type_info.name, type_info.description),
292 category: self.determine_type_category(type_info),
293 keywords: self.extract_keywords(&type_info.description),
294 type_signature: Some(format!("{:?}", type_info.kind)), documentation: type_info.description.clone(),
296 examples: vec![], metadata: SearchResultMetadata {
298 crate_name: "api".to_string(), module_path: type_info.path.clone(),
300 line_number: None,
301 visibility: Visibility::Public,
302 stability: Stability::Stable,
303 since_version: None, deprecated_since: None,
305 related_items: type_info.trait_impls.clone(),
306 },
307 usage_count: 0,
308 popularity_score: 0.0,
309 };
310
311 let item_id = item.id.clone();
312 self.index.items.insert(item_id.clone(), item);
313 self.index_words(&type_info.name, &item_id);
314 self.index_type_signature(&format!("{:?}", type_info.kind), &item_id);
315 }
316 Ok(())
317 }
318
319 fn index_examples(
320 &mut self,
321 examples: &[CodeExample],
322 ) -> Result<(), Box<dyn std::error::Error>> {
323 for (idx, example) in examples.iter().enumerate() {
324 let item = IndexedItem {
325 id: format!("example_{}", idx),
326 content: format!("{} {}", example.title, example.code),
327 category: ItemCategory::Function,
328 keywords: self.extract_keywords(&example.description),
329 type_signature: None,
330 documentation: example.description.clone(),
331 examples: vec![example.code.clone()],
332 metadata: SearchResultMetadata {
333 crate_name: "examples".to_string(),
334 module_path: "examples".to_string(),
335 line_number: None,
336 visibility: Visibility::Public,
337 stability: Stability::Stable,
338 since_version: None,
339 deprecated_since: None,
340 related_items: vec![],
341 },
342 usage_count: 0,
343 popularity_score: 0.0,
344 };
345
346 let item_id = item.id.clone();
347 self.index.items.insert(item_id.clone(), item);
348 self.index_words(&example.title, &item_id);
349 self.index_words(&example.description, &item_id);
350 }
351 Ok(())
352 }
353
354 fn build_trait_signature(&self, trait_info: &TraitInfo) -> String {
355 format!(
356 "trait {}{}",
357 trait_info.name,
358 if trait_info.generics.is_empty() {
359 String::new()
360 } else {
361 format!("<{}>", trait_info.generics.join(", "))
362 }
363 )
364 }
365
366 fn determine_type_category(&self, type_info: &TypeInfo) -> ItemCategory {
367 use crate::api_data_structures::TypeKind;
368 match type_info.kind {
369 TypeKind::Struct => ItemCategory::Struct,
370 TypeKind::Enum => ItemCategory::Enum,
371 TypeKind::Union => ItemCategory::Type,
372 TypeKind::TypeAlias => ItemCategory::Type,
373 TypeKind::Trait => ItemCategory::Trait,
374 }
375 }
376
377 fn extract_keywords(&self, text: &str) -> Vec<String> {
378 text.split_whitespace()
379 .map(|word| word.to_lowercase())
380 .filter(|word| word.len() > 2)
381 .collect()
382 }
383
384 fn index_words(&mut self, text: &str, item_id: &str) {
385 for word in text.split_whitespace() {
386 let word = word.to_lowercase();
387 self.index
388 .word_index
389 .entry(word)
390 .or_default()
391 .insert(item_id.to_string());
392 }
393 }
394
395 fn index_type_signature(&mut self, type_sig: &str, item_id: &str) {
396 self.index
397 .type_index
398 .entry(type_sig.to_string())
399 .or_default()
400 .insert(item_id.to_string());
401 }
402
403 fn build_semantic_embeddings(&mut self) -> Result<(), Box<dyn std::error::Error>> {
404 for (item_id, item) in &self.index.items {
405 let embedding = self.compute_embedding(&item.content);
406 self.index
407 .semantic_index
408 .embeddings
409 .insert(item_id.clone(), embedding);
410 }
411 self.build_semantic_clusters()?;
412 Ok(())
413 }
414
415 fn compute_embedding(&self, text: &str) -> Vec<f32> {
416 let words: Vec<&str> = text.split_whitespace().collect();
417 let mut embedding = vec![0.0; 300];
418
419 for (i, word) in words.iter().enumerate().take(300) {
420 embedding[i] = word.len() as f32;
421 }
422
423 embedding
424 }
425
426 fn build_semantic_clusters(&mut self) -> Result<(), Box<dyn std::error::Error>> {
427 let embeddings: Vec<(String, Vec<f32>)> = self
428 .index
429 .semantic_index
430 .embeddings
431 .iter()
432 .map(|(k, v)| (k.clone(), v.clone()))
433 .collect();
434
435 let clusters = self.k_means_clustering(&embeddings, 10)?;
436 self.index.semantic_index.clusters = clusters;
437 Ok(())
438 }
439
440 fn k_means_clustering(
441 &self,
442 embeddings: &[(String, Vec<f32>)],
443 k: usize,
444 ) -> Result<Vec<SemanticCluster>, Box<dyn std::error::Error>> {
445 let mut clusters = Vec::new();
446 let embedding_dim = embeddings.first().map(|(_, e)| e.len()).unwrap_or(300);
447
448 for i in 0..k {
449 clusters.push(SemanticCluster {
450 id: format!("cluster_{}", i),
451 center: vec![0.0; embedding_dim],
452 items: Vec::new(),
453 coherence_score: 0.0,
454 });
455 }
456
457 for (item_id, embedding) in embeddings {
458 let closest_cluster = self.find_closest_cluster(&clusters, embedding);
459 clusters[closest_cluster].items.push(item_id.clone());
460 }
461
462 Ok(clusters)
463 }
464
465 fn find_closest_cluster(&self, clusters: &[SemanticCluster], embedding: &[f32]) -> usize {
466 clusters
467 .iter()
468 .enumerate()
469 .min_by(|(_, a), (_, b)| {
470 let dist_a = self.cosine_distance(&a.center, embedding);
471 let dist_b = self.cosine_distance(&b.center, embedding);
472 dist_a
473 .partial_cmp(&dist_b)
474 .unwrap_or(std::cmp::Ordering::Equal)
475 })
476 .map(|(i, _)| i)
477 .unwrap_or(0)
478 }
479
480 fn cosine_distance(&self, a: &[f32], b: &[f32]) -> f64 {
481 let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
482 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
483 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
484
485 if norm_a == 0.0 || norm_b == 0.0 {
486 1.0
487 } else {
488 1.0 - (dot_product / (norm_a * norm_b)) as f64
489 }
490 }
491
492 fn build_autocomplete_trie(&mut self) -> Result<(), Box<dyn std::error::Error>> {
493 let mut trie = AutocompleteTrie::new();
494
495 for (item_id, item) in &self.index.items {
496 let suggestion = AutocompleteSuggestion {
497 text: item_id.clone(),
498 category: item.category.clone(),
499 description: item.documentation.clone(),
500 frequency: item.usage_count,
501 relevance_score: item.popularity_score,
502 };
503 trie.insert(item_id, suggestion);
504 }
505
506 self.index.autocomplete_trie = trie;
507 Ok(())
508 }
509
510 fn update_usage_statistics(&mut self) -> Result<(), Box<dyn std::error::Error>> {
511 let scores: Vec<(String, f64)> = self
512 .index
513 .items
514 .iter()
515 .map(|(id, item)| (id.clone(), self.calculate_popularity_score(item)))
516 .collect();
517
518 for (item_id, score) in scores {
519 if let Some(item) = self.index.items.get_mut(&item_id) {
520 item.popularity_score = score;
521 }
522 }
523 Ok(())
524 }
525
526 fn calculate_popularity_score(&self, item: &IndexedItem) -> f64 {
527 let base_score = item.usage_count as f64;
528 let documentation_score = if item.documentation.len() > 100 {
529 1.5
530 } else {
531 1.0
532 };
533 let examples_score = if !item.examples.is_empty() { 1.3 } else { 1.0 };
534
535 base_score * documentation_score * examples_score
536 }
537
538 pub fn search(
539 &mut self,
540 query: &SearchQuery,
541 ) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
542 let cache_key = self.build_cache_key(query);
543
544 if let Some(cached_results) = self.query_cache.get(&cache_key) {
545 self.performance_metrics.cache_hits += 1;
546 return Ok(cached_results.clone());
547 }
548
549 let results = match query.query_type {
550 SearchQueryType::Semantic => self.semantic_search(query)?,
551 SearchQueryType::TypeSignature => self.type_search(query)?,
552 SearchQueryType::Usage => self.usage_search(query)?,
553 _ => self.general_search(query)?,
554 };
555
556 let filtered_results = self.apply_filters(&results, &query.filters);
557 let ranked_results = self.rank_results(filtered_results, query);
558
559 let final_results: Vec<SearchResult> = ranked_results
560 .into_iter()
561 .take(self.config.max_results)
562 .collect();
563
564 self.query_cache.insert(cache_key, final_results.clone());
565 self.performance_metrics.total_queries += 1;
566
567 Ok(final_results)
568 }
569
570 fn build_cache_key(&self, query: &SearchQuery) -> String {
571 format!("{:?}", query)
572 }
573
574 fn semantic_search(
575 &self,
576 query: &SearchQuery,
577 ) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
578 let query_embedding = self.compute_embedding(&query.text);
579 let mut results = Vec::new();
580
581 for (item_id, item_embedding) in &self.index.semantic_index.embeddings {
582 let similarity = 1.0 - self.cosine_distance(&query_embedding, item_embedding);
583
584 if similarity >= self.config.similarity_threshold {
585 if let Some(item) = self.index.items.get(item_id) {
586 results.push(SearchResult {
587 id: item_id.clone(),
588 title: item_id.clone(),
589 description: item.documentation.clone(),
590 category: item.category.clone(),
591 url: format!("/docs/{}", item_id),
592 score: similarity,
593 snippet: self.generate_snippet(&item.content, &query.text),
594 metadata: item.metadata.clone(),
595 });
596 }
597 }
598 }
599
600 Ok(results)
601 }
602
603 fn type_search(
604 &self,
605 query: &SearchQuery,
606 ) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
607 let mut results = Vec::new();
608
609 for (type_sig, item_ids) in &self.index.type_index {
610 if type_sig.contains(&query.text) {
611 for item_id in item_ids {
612 if let Some(item) = self.index.items.get(item_id) {
613 results.push(SearchResult {
614 id: item_id.clone(),
615 title: item_id.clone(),
616 description: item.documentation.clone(),
617 category: item.category.clone(),
618 url: format!("/docs/{}", item_id),
619 score: self.calculate_type_match_score(type_sig, &query.text),
620 snippet: item.type_signature.clone(),
621 metadata: item.metadata.clone(),
622 });
623 }
624 }
625 }
626 }
627
628 Ok(results)
629 }
630
631 fn usage_search(
632 &self,
633 query: &SearchQuery,
634 ) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
635 let mut results = Vec::new();
636
637 for (pattern, usage_info) in &self.index.usage_index {
638 if pattern.contains(&query.text)
639 || usage_info
640 .common_patterns
641 .iter()
642 .any(|p| p.contains(&query.text))
643 {
644 for context in &usage_info.contexts {
645 results.push(SearchResult {
646 id: format!("usage_{}", pattern),
647 title: format!("Usage: {}", pattern),
648 description: context.description.clone(),
649 category: ItemCategory::Function,
650 url: context.location.clone(),
651 score: usage_info.frequency as f64,
652 snippet: Some(context.snippet.clone()),
653 metadata: SearchResultMetadata {
654 crate_name: "usage".to_string(),
655 module_path: pattern.clone(),
656 line_number: None,
657 visibility: Visibility::Public,
658 stability: Stability::Stable,
659 since_version: None,
660 deprecated_since: None,
661 related_items: usage_info.related_functions.clone(),
662 },
663 });
664 }
665 }
666 }
667
668 Ok(results)
669 }
670
671 fn general_search(
672 &self,
673 query: &SearchQuery,
674 ) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
675 let mut results = Vec::new();
676 let query_words: Vec<String> = query
677 .text
678 .split_whitespace()
679 .map(|w| w.to_lowercase())
680 .collect();
681
682 for word in &query_words {
683 if let Some(item_ids) = self.index.word_index.get(word) {
684 for item_id in item_ids {
685 if let Some(item) = self.index.items.get(item_id) {
686 let score = self.calculate_text_match_score(&item.content, &query.text);
687
688 results.push(SearchResult {
689 id: item_id.clone(),
690 title: item_id.clone(),
691 description: item.documentation.clone(),
692 category: item.category.clone(),
693 url: format!("/docs/{}", item_id),
694 score,
695 snippet: self.generate_snippet(&item.content, &query.text),
696 metadata: item.metadata.clone(),
697 });
698 }
699 }
700 }
701 }
702
703 Ok(results)
704 }
705
706 fn calculate_type_match_score(&self, type_sig: &str, query: &str) -> f64 {
707 if type_sig == query {
708 1.0
709 } else if type_sig.contains(query) {
710 0.8
711 } else {
712 0.3
713 }
714 }
715
716 fn calculate_text_match_score(&self, content: &str, query: &str) -> f64 {
717 let content_lower = content.to_lowercase();
718 let query_lower = query.to_lowercase();
719
720 if content_lower.contains(&query_lower) {
721 let exact_matches = content_lower.matches(&query_lower).count();
722 let word_count = content.split_whitespace().count();
723 (exact_matches as f64) / (word_count as f64).max(1.0)
724 } else {
725 0.1
726 }
727 }
728
729 fn generate_snippet(&self, content: &str, query: &str) -> Option<String> {
730 let query_lower = query.to_lowercase();
731 let content_lower = content.to_lowercase();
732
733 if let Some(pos) = content_lower.find(&query_lower) {
734 let start = pos.saturating_sub(50);
735 let end = (pos + query.len() + 50).min(content.len());
736 Some(content[start..end].to_string())
737 } else {
738 Some(content.chars().take(100).collect())
739 }
740 }
741
742 fn apply_filters(
743 &self,
744 results: &[SearchResult],
745 filters: &SearchFilters,
746 ) -> Vec<SearchResult> {
747 results
748 .iter()
749 .filter(|result| {
750 if !filters.categories.is_empty() && !filters.categories.contains(&result.category)
751 {
752 return false;
753 }
754 if !filters.crates.is_empty()
755 && !filters.crates.contains(&result.metadata.crate_name)
756 {
757 return false;
758 }
759 true
760 })
761 .cloned()
762 .collect()
763 }
764
765 fn rank_results(
766 &self,
767 mut results: Vec<SearchResult>,
768 _query: &SearchQuery,
769 ) -> Vec<SearchResult> {
770 results.sort_by(|a, b| {
771 let score_cmp = b
772 .score
773 .partial_cmp(&a.score)
774 .unwrap_or(std::cmp::Ordering::Equal);
775 if score_cmp != std::cmp::Ordering::Equal {
776 return score_cmp;
777 }
778
779 a.title.cmp(&b.title)
780 });
781
782 results
783 }
784
785 pub fn get_autocomplete_suggestions(
786 &self,
787 prefix: &str,
788 limit: usize,
789 ) -> Vec<AutocompleteSuggestion> {
790 self.index.autocomplete_trie.get_suggestions(prefix, limit)
791 }
792
793 pub fn get_search_metrics(&self) -> &SearchMetrics {
794 &self.performance_metrics
795 }
796}
797
798impl Default for SearchIndex {
799 fn default() -> Self {
800 Self::new()
801 }
802}
803
804impl SearchIndex {
805 pub fn new() -> Self {
806 Self {
807 items: HashMap::new(),
808 word_index: HashMap::new(),
809 type_index: HashMap::new(),
810 usage_index: HashMap::new(),
811 semantic_index: SemanticIndex::new(),
812 autocomplete_trie: AutocompleteTrie::new(),
813 last_updated: chrono::Utc::now(),
814 }
815 }
816}
817
818impl Default for SemanticIndex {
819 fn default() -> Self {
820 Self::new()
821 }
822}
823
824impl SemanticIndex {
825 pub fn new() -> Self {
826 Self {
827 embeddings: HashMap::new(),
828 clusters: Vec::new(),
829 similarity_matrix: HashMap::new(),
830 }
831 }
832}
833
834impl Default for AutocompleteTrie {
835 fn default() -> Self {
836 Self::new()
837 }
838}
839
840impl AutocompleteTrie {
841 pub fn new() -> Self {
842 Self {
843 root: TrieNode::new(),
844 suggestions_cache: HashMap::new(),
845 }
846 }
847
848 pub fn insert(&mut self, word: &str, suggestion: AutocompleteSuggestion) {
849 let mut current = &mut self.root;
850
851 for ch in word.chars() {
852 current = current.children.entry(ch).or_default();
853 }
854
855 current.is_end_of_word = true;
856 current.completions.push(suggestion);
857 current.frequency += 1;
858 }
859
860 pub fn get_suggestions(&self, prefix: &str, limit: usize) -> Vec<AutocompleteSuggestion> {
861 if let Some(cached) = self.suggestions_cache.get(prefix) {
862 return cached.iter().take(limit).cloned().collect();
863 }
864
865 let mut current = &self.root;
866
867 for ch in prefix.chars() {
868 if let Some(child) = current.children.get(&ch) {
869 current = child;
870 } else {
871 return Vec::new();
872 }
873 }
874
875 let mut suggestions = Vec::new();
876 self.collect_suggestions(current, &mut suggestions);
877
878 suggestions.sort_by(|a, b| {
879 b.relevance_score
880 .partial_cmp(&a.relevance_score)
881 .unwrap_or(std::cmp::Ordering::Equal)
882 .then_with(|| b.frequency.cmp(&a.frequency))
883 });
884
885 suggestions.into_iter().take(limit).collect()
886 }
887
888 #[allow(clippy::only_used_in_recursion)]
889 fn collect_suggestions(&self, node: &TrieNode, suggestions: &mut Vec<AutocompleteSuggestion>) {
890 if node.is_end_of_word {
891 suggestions.extend(node.completions.iter().cloned());
892 }
893
894 for child in node.children.values() {
895 self.collect_suggestions(child, suggestions);
896 }
897 }
898}
899
900impl Default for TrieNode {
901 fn default() -> Self {
902 Self::new()
903 }
904}
905
906impl TrieNode {
907 pub fn new() -> Self {
908 Self {
909 value: None,
910 children: HashMap::new(),
911 is_end_of_word: false,
912 completions: Vec::new(),
913 frequency: 0,
914 }
915 }
916}
917
918impl Default for SearchMetrics {
919 fn default() -> Self {
920 Self {
921 total_queries: 0,
922 cache_hits: 0,
923 average_response_time: 0.0,
924 index_size: 0,
925 last_reindex_time: chrono::Utc::now(),
926 }
927 }
928}
929
930#[allow(non_snake_case)]
931#[cfg(test)]
932mod tests {
933 use super::*;
934
935 #[test]
936 fn test_search_engine_creation() {
937 let config = SearchEngineConfig::default();
938 let engine = SemanticSearchEngine::new(config);
939 assert_eq!(engine.config.max_results, 50);
940 }
941
942 #[test]
943 fn test_autocomplete_trie() {
944 let mut trie = AutocompleteTrie::new();
945 let suggestion = AutocompleteSuggestion {
946 text: "test".to_string(),
947 category: ItemCategory::Function,
948 description: "Test function".to_string(),
949 frequency: 1,
950 relevance_score: 1.0,
951 };
952
953 trie.insert("test", suggestion);
954 let suggestions = trie.get_suggestions("te", 10);
955 assert_eq!(suggestions.len(), 1);
956 assert_eq!(suggestions[0].text, "test");
957 }
958
959 #[test]
960 fn test_search_query_creation() {
961 let query = SearchQuery {
962 text: "linear regression".to_string(),
963 query_type: SearchQueryType::Semantic,
964 filters: SearchFilters::default(),
965 options: SearchOptions::default(),
966 };
967
968 assert_eq!(query.text, "linear regression");
969 assert!(matches!(query.query_type, SearchQueryType::Semantic));
970 }
971
972 #[test]
973 fn test_cosine_distance() {
974 let engine = SemanticSearchEngine::new(SearchEngineConfig::default());
975 let vec1 = vec![1.0, 0.0, 0.0];
976 let vec2 = vec![0.0, 1.0, 0.0];
977
978 let distance = engine.cosine_distance(&vec1, &vec2);
979 assert!((distance - 1.0).abs() < 1e-6);
980 }
981
982 #[test]
983 fn test_embedding_computation() {
984 let engine = SemanticSearchEngine::new(SearchEngineConfig::default());
985 let embedding = engine.compute_embedding("test string");
986 assert_eq!(embedding.len(), 300);
987 }
988}