Skip to main content

oxirs_vec/
personalized_search.rs

1//! Personalized vector search with user-specific embeddings and preferences
2//!
3//! This module provides personalized search capabilities that adapt to individual
4//! user behavior, preferences, and interaction history. It maintains user-specific
5//! embeddings that evolve over time based on feedback signals.
6//!
7//! # Features
8//!
9//! - **User embeddings**: Learn and maintain personalized user representations
10//! - **Collaborative filtering**: Leverage behavior of similar users
11//! - **Contextual bandits**: Balance exploration vs exploitation
12//! - **Preference learning**: Adapt to explicit and implicit feedback
13//! - **Privacy-aware**: Support for federated and differential privacy
14//! - **Real-time adaptation**: Update user models with each interaction
15//!
16//! # Example
17//!
18//! ```rust,no_run
19//! use oxirs_vec::personalized_search::{PersonalizedSearchEngine, UserFeedback, FeedbackType};
20//!
21//! // Create personalized search engine
22//! let mut engine = PersonalizedSearchEngine::new_default()?;
23//!
24//! // Register user
25//! engine.register_user("user123", None)?;
26//!
27//! // Search with personalization
28//! let results = engine.personalized_search("user123", "machine learning", 10)?;
29//!
30//! // Provide feedback
31//! engine.record_feedback(UserFeedback {
32//!     user_id: "user123".to_string(),
33//!     item_id: results[0].id.clone(),
34//!     feedback_type: FeedbackType::Click,
35//!     score: 1.0,
36//!     timestamp: std::time::SystemTime::now(),
37//!     metadata: Default::default(),
38//! })?;
39//! # Ok::<(), anyhow::Error>(())
40//! ```
41
42use crate::Vector;
43use crate::VectorStore;
44use anyhow::{anyhow, Result};
45use parking_lot::RwLock;
46use scirs2_core::random::RngCore;
47use serde::{Deserialize, Serialize};
48use std::collections::HashMap;
49use std::sync::Arc;
50use std::time::{Duration, SystemTime};
51
52/// Type alias for the similarity matrix between users
53type SimilarityMatrix = Arc<RwLock<Option<HashMap<(String, String), f32>>>>;
54
55/// Personalized search engine that maintains user-specific models
56pub struct PersonalizedSearchEngine {
57    config: PersonalizationConfig,
58    vector_store: Arc<RwLock<VectorStore>>,
59    user_profiles: Arc<RwLock<HashMap<String, UserProfile>>>,
60    item_profiles: Arc<RwLock<HashMap<String, ItemProfile>>>,
61    interaction_history: Arc<RwLock<Vec<UserInteraction>>>,
62    similarity_matrix: SimilarityMatrix,
63}
64
65/// Configuration for personalized search
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct PersonalizationConfig {
68    /// Dimension of user embeddings
69    pub user_embedding_dim: usize,
70    /// Learning rate for user embedding updates
71    pub learning_rate: f32,
72    /// Decay factor for older interactions
73    pub time_decay_factor: f32,
74    /// Weight for collaborative filtering
75    pub collaborative_weight: f32,
76    /// Weight for content-based filtering
77    pub content_weight: f32,
78    /// Enable contextual bandits
79    pub enable_bandits: bool,
80    /// Exploration rate for bandits
81    pub exploration_rate: f32,
82    /// Enable differential privacy
83    pub enable_privacy: bool,
84    /// Privacy epsilon parameter
85    pub privacy_epsilon: f32,
86    /// Minimum interactions before personalization
87    pub min_interactions: usize,
88    /// User similarity threshold
89    pub user_similarity_threshold: f32,
90    /// Enable real-time updates
91    pub enable_realtime_updates: bool,
92    /// Cold start strategy
93    pub cold_start_strategy: ColdStartStrategy,
94}
95
96impl Default for PersonalizationConfig {
97    fn default() -> Self {
98        Self {
99            user_embedding_dim: 128,
100            learning_rate: 0.01,
101            time_decay_factor: 0.95,
102            collaborative_weight: 0.4,
103            content_weight: 0.6,
104            enable_bandits: true,
105            exploration_rate: 0.1,
106            enable_privacy: false,
107            privacy_epsilon: 1.0,
108            min_interactions: 5,
109            user_similarity_threshold: 0.7,
110            enable_realtime_updates: true,
111            cold_start_strategy: ColdStartStrategy::PopularityBased,
112        }
113    }
114}
115
116/// Strategy for handling new users (cold start problem)
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub enum ColdStartStrategy {
119    /// Use global popularity
120    PopularityBased,
121    /// Use demographic information
122    DemographicBased,
123    /// Use random exploration
124    RandomExploration,
125    /// Use hybrid approach
126    Hybrid,
127}
128
129/// User profile containing personalized embedding and preferences
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct UserProfile {
132    pub user_id: String,
133    pub embedding: Vec<f32>,
134    pub preferences: HashMap<String, f32>,
135    pub interaction_count: usize,
136    pub last_updated: SystemTime,
137    pub demographics: Option<UserDemographics>,
138    pub similar_users: Vec<(String, f32)>, // (user_id, similarity)
139    pub favorite_categories: HashMap<String, f32>,
140    pub negative_items: Vec<String>, // Disliked items
141}
142
143/// User demographic information
144#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct UserDemographics {
146    pub age_group: Option<String>,
147    pub location: Option<String>,
148    pub language: Option<String>,
149    pub interests: Vec<String>,
150}
151
152/// Item profile with popularity and category information
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct ItemProfile {
155    pub item_id: String,
156    pub embedding: Vec<f32>,
157    pub popularity_score: f32,
158    pub categories: Vec<String>,
159    pub interaction_count: usize,
160    pub average_rating: f32,
161    pub last_accessed: SystemTime,
162}
163
164/// User interaction record
165#[derive(Debug, Clone, Serialize, Deserialize)]
166pub struct UserInteraction {
167    pub user_id: String,
168    pub item_id: String,
169    pub interaction_type: InteractionType,
170    pub score: f32,
171    pub timestamp: SystemTime,
172    pub context: HashMap<String, String>,
173}
174
175/// Type of user interaction
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub enum InteractionType {
178    View,
179    Click,
180    Like,
181    Dislike,
182    Share,
183    Purchase,
184    Rating(f32),
185    DwellTime(Duration),
186    Custom(String),
187}
188
189/// User feedback for model updates
190#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct UserFeedback {
192    pub user_id: String,
193    pub item_id: String,
194    pub feedback_type: FeedbackType,
195    pub score: f32,
196    pub timestamp: SystemTime,
197    pub metadata: HashMap<String, String>,
198}
199
200/// Type of feedback signal
201#[derive(Debug, Clone, Serialize, Deserialize)]
202pub enum FeedbackType {
203    Explicit(f32), // Rating
204    Click,         // Binary positive signal
205    View,          // Implicit interest
206    Skip,          // Negative signal
207    Purchase,      // Strong positive signal
208    Share,         // Strong positive signal
209    LongDwell,     // Time-based positive
210    QuickBounce,   // Time-based negative
211    Custom(String),
212}
213
214/// Personalized search result
215#[derive(Debug, Clone, Serialize, Deserialize)]
216pub struct PersonalizedResult {
217    pub id: String,
218    pub score: f32,
219    pub personalization_score: f32,
220    pub content_score: f32,
221    pub collaborative_score: f32,
222    pub exploration_bonus: f32,
223    pub metadata: HashMap<String, String>,
224    pub explanation: Option<String>,
225}
226
227impl PersonalizedSearchEngine {
228    /// Create a new personalized search engine with default configuration
229    pub fn new_default() -> Result<Self> {
230        Self::new(PersonalizationConfig::default(), None)
231    }
232
233    /// Create a new personalized search engine with custom configuration
234    pub fn new(config: PersonalizationConfig, vector_store: Option<VectorStore>) -> Result<Self> {
235        let default_store = VectorStore::new();
236        let vector_store = Arc::new(RwLock::new(vector_store.unwrap_or(default_store)));
237
238        Ok(Self {
239            config,
240            vector_store,
241            user_profiles: Arc::new(RwLock::new(HashMap::new())),
242            item_profiles: Arc::new(RwLock::new(HashMap::new())),
243            interaction_history: Arc::new(RwLock::new(Vec::new())),
244            similarity_matrix: Arc::new(RwLock::new(None)),
245        })
246    }
247
248    /// Register a new user
249    pub fn register_user(
250        &mut self,
251        user_id: impl Into<String>,
252        demographics: Option<UserDemographics>,
253    ) -> Result<()> {
254        let user_id = user_id.into();
255
256        // Initialize user embedding
257        let embedding = self.initialize_user_embedding(&user_id, demographics.as_ref())?;
258
259        let profile = UserProfile {
260            user_id: user_id.clone(),
261            embedding,
262            preferences: HashMap::new(),
263            interaction_count: 0,
264            last_updated: SystemTime::now(),
265            demographics,
266            similar_users: Vec::new(),
267            favorite_categories: HashMap::new(),
268            negative_items: Vec::new(),
269        };
270
271        self.user_profiles.write().insert(user_id, profile);
272
273        Ok(())
274    }
275
276    /// Perform personalized search for a user
277    pub fn personalized_search(
278        &self,
279        user_id: impl Into<String>,
280        query: impl Into<String>,
281        k: usize,
282    ) -> Result<Vec<PersonalizedResult>> {
283        let user_id = user_id.into();
284        let query = query.into();
285
286        // Get user profile
287        let user_profiles = self.user_profiles.read();
288        let user_profile = user_profiles
289            .get(&user_id)
290            .ok_or_else(|| anyhow!("User not found: {}", user_id))?;
291
292        // Check if user has enough interactions for personalization
293        let use_personalization = user_profile.interaction_count >= self.config.min_interactions;
294
295        // Get base search results (content-based)
296        let base_results = self.content_based_search(&query, k * 3)?;
297
298        // Apply personalization
299        let personalized_results = if use_personalization {
300            self.apply_personalization(&user_id, base_results, k)?
301        } else {
302            self.apply_cold_start_strategy(&user_id, base_results, k)?
303        };
304
305        Ok(personalized_results)
306    }
307
308    /// Content-based search without personalization
309    fn content_based_search(&self, query: &str, k: usize) -> Result<Vec<PersonalizedResult>> {
310        // Simple text embedding (in production, use proper embedding model)
311        let _query_embedding = self.create_query_embedding(query)?;
312
313        // Search in vector store using text query
314        let store = self.vector_store.read();
315        let results = store.similarity_search(query, k)?;
316
317        // Convert to PersonalizedResult
318        Ok(results
319            .into_iter()
320            .map(|(id, score)| PersonalizedResult {
321                id,
322                score,
323                personalization_score: 0.0,
324                content_score: score,
325                collaborative_score: 0.0,
326                exploration_bonus: 0.0,
327                metadata: HashMap::new(),
328                explanation: None,
329            })
330            .collect())
331    }
332
333    /// Apply personalization to search results
334    fn apply_personalization(
335        &self,
336        user_id: &str,
337        mut results: Vec<PersonalizedResult>,
338        k: usize,
339    ) -> Result<Vec<PersonalizedResult>> {
340        let user_profiles = self.user_profiles.read();
341        let user_profile = user_profiles
342            .get(user_id)
343            .ok_or_else(|| anyhow!("User not found"))?;
344
345        // Compute collaborative filtering scores
346        for result in &mut results {
347            // Collaborative score based on similar users
348            let collab_score = self.compute_collaborative_score(user_profile, &result.id)?;
349
350            // Personalization score based on user embedding
351            let personal_score = self.compute_personalization_score(user_profile, &result.id)?;
352
353            // Exploration bonus (contextual bandits)
354            let exploration_bonus = if self.config.enable_bandits {
355                self.compute_exploration_bonus(user_profile, &result.id)?
356            } else {
357                0.0
358            };
359
360            // Combine scores
361            result.collaborative_score = collab_score;
362            result.personalization_score = personal_score;
363            result.exploration_bonus = exploration_bonus;
364
365            result.score = self.config.content_weight * result.content_score
366                + self.config.collaborative_weight * collab_score
367                + (1.0 - self.config.content_weight - self.config.collaborative_weight)
368                    * personal_score
369                + exploration_bonus;
370
371            // Generate explanation
372            result.explanation = Some(self.generate_explanation(result));
373        }
374
375        // Re-rank by combined score
376        results.sort_by(|a, b| {
377            b.score
378                .partial_cmp(&a.score)
379                .unwrap_or(std::cmp::Ordering::Equal)
380        });
381
382        // Apply diversity
383        let diversified = self.apply_diversity(&results, k)?;
384
385        Ok(diversified)
386    }
387
388    /// Compute collaborative filtering score
389    fn compute_collaborative_score(
390        &self,
391        user_profile: &UserProfile,
392        item_id: &str,
393    ) -> Result<f32> {
394        let item_profiles = self.item_profiles.read();
395
396        if let Some(item_profile) = item_profiles.get(item_id) {
397            // Score based on similar users' interactions
398            let mut collab_score = 0.0;
399            let mut total_weight = 0.0;
400
401            for (similar_user_id, similarity) in &user_profile.similar_users {
402                // Check if similar user interacted with this item
403                let interactions = self.interaction_history.read();
404                let user_interacted = interactions.iter().any(|i| {
405                    &i.user_id == similar_user_id && i.item_id == item_id && i.score > 0.0
406                });
407
408                if user_interacted {
409                    collab_score += similarity;
410                    total_weight += similarity;
411                }
412            }
413
414            if total_weight > 0.0 {
415                collab_score /= total_weight;
416            }
417
418            // Add popularity bonus
419            collab_score += item_profile.popularity_score * 0.1;
420
421            Ok(collab_score.min(1.0))
422        } else {
423            Ok(0.0)
424        }
425    }
426
427    /// Compute personalization score based on user embedding
428    fn compute_personalization_score(
429        &self,
430        user_profile: &UserProfile,
431        item_id: &str,
432    ) -> Result<f32> {
433        let item_profiles = self.item_profiles.read();
434
435        if let Some(item_profile) = item_profiles.get(item_id) {
436            // Compute cosine similarity between user and item embeddings
437            let similarity =
438                self.cosine_similarity(&user_profile.embedding, &item_profile.embedding);
439
440            // Check negative items
441            if user_profile.negative_items.contains(&item_id.to_string()) {
442                return Ok(similarity * 0.5); // Penalize disliked items
443            }
444
445            // Boost based on category preferences
446            let category_boost = item_profile
447                .categories
448                .iter()
449                .filter_map(|cat| user_profile.favorite_categories.get(cat))
450                .sum::<f32>()
451                / item_profile.categories.len().max(1) as f32;
452
453            Ok((similarity + category_boost * 0.3).min(1.0))
454        } else {
455            Ok(0.0)
456        }
457    }
458
459    /// Compute exploration bonus using contextual bandits
460    fn compute_exploration_bonus(&self, user_profile: &UserProfile, item_id: &str) -> Result<f32> {
461        let item_profiles = self.item_profiles.read();
462
463        if let Some(item_profile) = item_profiles.get(item_id) {
464            // UCB-style exploration bonus
465            let n = user_profile.interaction_count as f32;
466            let n_i = item_profile.interaction_count as f32;
467
468            if n_i == 0.0 {
469                // High exploration bonus for unseen items
470                return Ok(self.config.exploration_rate);
471            }
472
473            let exploration_bonus = self.config.exploration_rate * ((2.0 * n.ln() / n_i).sqrt());
474
475            Ok(exploration_bonus.min(0.5))
476        } else {
477            Ok(0.0)
478        }
479    }
480
481    /// Apply cold start strategy for new users
482    fn apply_cold_start_strategy(
483        &self,
484        _user_id: &str,
485        mut results: Vec<PersonalizedResult>,
486        k: usize,
487    ) -> Result<Vec<PersonalizedResult>> {
488        match self.config.cold_start_strategy {
489            ColdStartStrategy::PopularityBased => {
490                // Boost popular items
491                let item_profiles = self.item_profiles.read();
492
493                for result in &mut results {
494                    if let Some(item_profile) = item_profiles.get(&result.id) {
495                        result.score += item_profile.popularity_score * 0.3;
496                    }
497                }
498
499                results.sort_by(|a, b| {
500                    b.score
501                        .partial_cmp(&a.score)
502                        .unwrap_or(std::cmp::Ordering::Equal)
503                });
504            }
505            ColdStartStrategy::RandomExploration => {
506                // Add random exploration
507                use scirs2_core::random::rng;
508                let mut rng_instance = rng();
509
510                for result in &mut results {
511                    // Generate random value between 0.0 and 0.2
512                    let random_val = (rng_instance.next_u64() as f32 / u64::MAX as f32) * 0.2;
513                    result.score += random_val;
514                }
515
516                results.sort_by(|a, b| {
517                    b.score
518                        .partial_cmp(&a.score)
519                        .unwrap_or(std::cmp::Ordering::Equal)
520                });
521            }
522            ColdStartStrategy::DemographicBased => {
523                // Use demographic-based recommendations (simplified)
524                results.sort_by(|a, b| {
525                    b.score
526                        .partial_cmp(&a.score)
527                        .unwrap_or(std::cmp::Ordering::Equal)
528                });
529            }
530            ColdStartStrategy::Hybrid => {
531                // Combine multiple strategies
532                use scirs2_core::random::rng;
533                let item_profiles = self.item_profiles.read();
534                let mut rng_instance = rng();
535
536                for result in &mut results {
537                    if let Some(item_profile) = item_profiles.get(&result.id) {
538                        let random_val = (rng_instance.next_u64() as f32 / u64::MAX as f32) * 0.1;
539                        result.score += item_profile.popularity_score * 0.2 + random_val;
540                    }
541                }
542
543                results.sort_by(|a, b| {
544                    b.score
545                        .partial_cmp(&a.score)
546                        .unwrap_or(std::cmp::Ordering::Equal)
547                });
548            }
549        }
550
551        Ok(results.into_iter().take(k).collect())
552    }
553
554    /// Record user feedback and update user profile
555    pub fn record_feedback(&mut self, feedback: UserFeedback) -> Result<()> {
556        // Convert feedback to interaction
557        let interaction = UserInteraction {
558            user_id: feedback.user_id.clone(),
559            item_id: feedback.item_id.clone(),
560            interaction_type: Self::feedback_to_interaction_type(&feedback.feedback_type),
561            score: feedback.score,
562            timestamp: feedback.timestamp,
563            context: feedback.metadata.clone(),
564        };
565
566        // Store interaction
567        self.interaction_history.write().push(interaction.clone());
568
569        // Update user profile if real-time updates enabled
570        if self.config.enable_realtime_updates {
571            self.update_user_profile(&feedback.user_id, &interaction)?;
572        }
573
574        // Update item profile
575        self.update_item_profile(&feedback.item_id, &interaction)?;
576
577        Ok(())
578    }
579
580    /// Update user profile based on interaction
581    fn update_user_profile(&mut self, user_id: &str, interaction: &UserInteraction) -> Result<()> {
582        let mut user_profiles = self.user_profiles.write();
583
584        if let Some(profile) = user_profiles.get_mut(user_id) {
585            // Update interaction count
586            profile.interaction_count += 1;
587            profile.last_updated = SystemTime::now();
588
589            // Get item embedding
590            let item_profiles = self.item_profiles.read();
591            if let Some(item_profile) = item_profiles.get(&interaction.item_id) {
592                // Update user embedding using gradient descent
593                let learning_rate = self.config.learning_rate;
594
595                for (i, emb_val) in profile.embedding.iter_mut().enumerate() {
596                    if i < item_profile.embedding.len() {
597                        let target = item_profile.embedding[i];
598                        let gradient = (target - *emb_val) * interaction.score;
599                        *emb_val += learning_rate * gradient;
600                    }
601                }
602
603                // Normalize embedding
604                let norm: f32 = profile.embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
605                if norm > 0.0 {
606                    profile.embedding.iter_mut().for_each(|x| *x /= norm);
607                }
608
609                // Update category preferences
610                for category in &item_profile.categories {
611                    let current = profile
612                        .favorite_categories
613                        .get(category)
614                        .copied()
615                        .unwrap_or(0.0);
616                    let updated = current * 0.9 + interaction.score * 0.1;
617                    profile
618                        .favorite_categories
619                        .insert(category.clone(), updated);
620                }
621
622                // Update negative items
623                if interaction.score < 0.0 {
624                    profile.negative_items.push(interaction.item_id.clone());
625                }
626            }
627        }
628
629        Ok(())
630    }
631
632    /// Update item profile based on interaction
633    fn update_item_profile(&mut self, item_id: &str, interaction: &UserInteraction) -> Result<()> {
634        let mut item_profiles = self.item_profiles.write();
635
636        if let Some(profile) = item_profiles.get_mut(item_id) {
637            profile.interaction_count += 1;
638            profile.last_accessed = SystemTime::now();
639
640            // Update average rating
641            let old_avg = profile.average_rating;
642            let count = profile.interaction_count as f32;
643            profile.average_rating = (old_avg * (count - 1.0) + interaction.score) / count;
644
645            // Update popularity score (decayed)
646            profile.popularity_score = profile.popularity_score * 0.95 + interaction.score * 0.05;
647        }
648
649        Ok(())
650    }
651
652    /// Update user similarity matrix
653    pub fn update_user_similarities(&mut self) -> Result<()> {
654        let user_profiles = self.user_profiles.read();
655        let user_ids: Vec<String> = user_profiles.keys().cloned().collect();
656
657        for user_id in &user_ids {
658            if let Some(user_profile) = user_profiles.get(user_id) {
659                let mut similar_users = Vec::new();
660
661                // Compute similarities with all other users
662                for other_id in &user_ids {
663                    if other_id != user_id {
664                        if let Some(other_profile) = user_profiles.get(other_id) {
665                            let similarity = self.cosine_similarity(
666                                &user_profile.embedding,
667                                &other_profile.embedding,
668                            );
669
670                            if similarity >= self.config.user_similarity_threshold {
671                                similar_users.push((other_id.clone(), similarity));
672                            }
673                        }
674                    }
675                }
676
677                // Sort by similarity and keep top 10
678                similar_users
679                    .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
680                similar_users.truncate(10);
681
682                // Update user profile (need to drop read lock and acquire write lock)
683                drop(user_profiles);
684                let mut user_profiles = self.user_profiles.write();
685                if let Some(profile) = user_profiles.get_mut(user_id) {
686                    profile.similar_users = similar_users;
687                }
688
689                return Ok(()); // Early return to avoid deadlock
690            }
691        }
692
693        Ok(())
694    }
695
696    /// Apply diversity to results
697    fn apply_diversity(
698        &self,
699        results: &[PersonalizedResult],
700        k: usize,
701    ) -> Result<Vec<PersonalizedResult>> {
702        // MMR-style diversity
703        let mut diversified = Vec::new();
704        let mut remaining: Vec<PersonalizedResult> = results.to_vec();
705
706        if !remaining.is_empty() {
707            // Add highest scored item first
708            diversified.push(remaining.remove(0));
709        }
710
711        let lambda = 0.7; // Relevance vs diversity trade-off
712
713        while diversified.len() < k && !remaining.is_empty() {
714            let mut best_idx = 0;
715            let mut best_score = f32::NEG_INFINITY;
716
717            for (i, candidate) in remaining.iter().enumerate() {
718                // Compute minimum similarity to already selected items
719                let mut min_similarity = 1.0f32;
720
721                for selected in &diversified {
722                    let similarity = if selected.metadata.get("category")
723                        == candidate.metadata.get("category")
724                    {
725                        0.8
726                    } else {
727                        0.2
728                    };
729
730                    min_similarity = min_similarity.min(similarity);
731                }
732
733                // MMR score
734                let mmr_score = lambda * candidate.score + (1.0 - lambda) * (1.0 - min_similarity);
735
736                if mmr_score > best_score {
737                    best_score = mmr_score;
738                    best_idx = i;
739                }
740            }
741
742            diversified.push(remaining.remove(best_idx));
743        }
744
745        Ok(diversified)
746    }
747
748    /// Generate explanation for personalized result
749    fn generate_explanation(&self, result: &PersonalizedResult) -> String {
750        let mut reasons = Vec::new();
751
752        if result.personalization_score > 0.5 {
753            reasons.push("matches your interests");
754        }
755
756        if result.collaborative_score > 0.5 {
757            reasons.push("liked by similar users");
758        }
759
760        if result.exploration_bonus > 0.1 {
761            reasons.push("new discovery");
762        }
763
764        if reasons.is_empty() {
765            reasons.push("relevant to your query");
766        }
767
768        format!("Recommended because: {}", reasons.join(", "))
769    }
770
771    /// Initialize user embedding
772    fn initialize_user_embedding(
773        &self,
774        _user_id: &str,
775        demographics: Option<&UserDemographics>,
776    ) -> Result<Vec<f32>> {
777        use scirs2_core::random::rng;
778        let mut embedding = vec![0.0f32; self.config.user_embedding_dim];
779
780        if let Some(demo) = demographics {
781            // Use demographics to seed embedding
782            for (_i, interest) in demo.interests.iter().enumerate().take(embedding.len() / 2) {
783                let hash = Self::hash_string(interest);
784                let idx = (hash % self.config.user_embedding_dim as u64) as usize;
785                embedding[idx] = 0.5;
786            }
787        } else {
788            // Random initialization
789            let mut rng_instance = rng();
790
791            for val in &mut embedding {
792                // Generate random value between -0.1 and 0.1
793                let random_val = (rng_instance.next_u64() as f32 / u64::MAX as f32) * 0.2 - 0.1;
794                *val = random_val;
795            }
796        }
797
798        // Normalize
799        let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
800        if norm > 0.0 {
801            embedding.iter_mut().for_each(|x| *x /= norm);
802        }
803
804        Ok(embedding)
805    }
806
807    /// Create query embedding
808    fn create_query_embedding(&self, query: &str) -> Result<Vector> {
809        // Simple token-based embedding (in production, use proper model)
810        let tokens: Vec<String> = query
811            .to_lowercase()
812            .split_whitespace()
813            .map(String::from)
814            .collect();
815
816        let mut embedding = vec![0.0f32; 128]; // Default dimension
817
818        for token in tokens {
819            let hash = Self::hash_string(&token);
820            let idx = (hash % embedding.len() as u64) as usize;
821            embedding[idx] += 1.0;
822        }
823
824        // Normalize
825        let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
826        if norm > 0.0 {
827            embedding.iter_mut().for_each(|x| *x /= norm);
828        }
829
830        Ok(Vector::new(embedding))
831    }
832
833    /// Compute cosine similarity between two vectors
834    fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 {
835        if a.len() != b.len() {
836            return 0.0;
837        }
838
839        let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
840        let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
841        let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
842
843        if norm_a == 0.0 || norm_b == 0.0 {
844            return 0.0;
845        }
846
847        dot_product / (norm_a * norm_b)
848    }
849
850    /// Convert feedback type to interaction type
851    fn feedback_to_interaction_type(feedback_type: &FeedbackType) -> InteractionType {
852        match feedback_type {
853            FeedbackType::Explicit(rating) => InteractionType::Rating(*rating),
854            FeedbackType::Click => InteractionType::Click,
855            FeedbackType::View => InteractionType::View,
856            FeedbackType::Skip => InteractionType::Custom("skip".to_string()),
857            FeedbackType::Purchase => InteractionType::Purchase,
858            FeedbackType::Share => InteractionType::Share,
859            FeedbackType::LongDwell => InteractionType::DwellTime(Duration::from_secs(60)),
860            FeedbackType::QuickBounce => InteractionType::DwellTime(Duration::from_secs(5)),
861            FeedbackType::Custom(name) => InteractionType::Custom(name.clone()),
862        }
863    }
864
865    /// Hash string to u64
866    fn hash_string(s: &str) -> u64 {
867        use std::collections::hash_map::DefaultHasher;
868        use std::hash::{Hash, Hasher};
869
870        let mut hasher = DefaultHasher::new();
871        s.hash(&mut hasher);
872        hasher.finish()
873    }
874
875    /// Get user profile
876    pub fn get_user_profile(&self, user_id: &str) -> Option<UserProfile> {
877        self.user_profiles.read().get(user_id).cloned()
878    }
879
880    /// Get statistics
881    pub fn get_statistics(&self) -> PersonalizationStatistics {
882        let user_profiles = self.user_profiles.read();
883        let item_profiles = self.item_profiles.read();
884        let interactions = self.interaction_history.read();
885
886        PersonalizationStatistics {
887            total_users: user_profiles.len(),
888            total_items: item_profiles.len(),
889            total_interactions: interactions.len(),
890            average_interactions_per_user: if user_profiles.is_empty() {
891                0.0
892            } else {
893                interactions.len() as f32 / user_profiles.len() as f32
894            },
895        }
896    }
897}
898
899/// Statistics about personalization
900#[derive(Debug, Clone, Serialize, Deserialize)]
901pub struct PersonalizationStatistics {
902    pub total_users: usize,
903    pub total_items: usize,
904    pub total_interactions: usize,
905    pub average_interactions_per_user: f32,
906}
907
908#[cfg(test)]
909mod tests {
910    use super::*;
911
912    #[test]
913    fn test_register_user() -> Result<()> {
914        let mut engine = PersonalizedSearchEngine::new_default()?;
915
916        engine.register_user("user1", None)?;
917
918        let profile = engine.get_user_profile("user1");
919        assert!(profile.is_some());
920
921        Ok(())
922    }
923
924    #[test]
925    fn test_feedback_recording() -> Result<()> {
926        let mut engine = PersonalizedSearchEngine::new_default()?;
927
928        engine.register_user("user1", None)?;
929
930        let feedback = UserFeedback {
931            user_id: "user1".to_string(),
932            item_id: "item1".to_string(),
933            feedback_type: FeedbackType::Click,
934            score: 1.0,
935            timestamp: SystemTime::now(),
936            metadata: HashMap::new(),
937        };
938
939        engine.record_feedback(feedback)?;
940
941        let stats = engine.get_statistics();
942        assert_eq!(stats.total_interactions, 1);
943
944        Ok(())
945    }
946
947    #[test]
948    fn test_cold_start_strategy() -> Result<()> {
949        let engine = PersonalizedSearchEngine::new_default()?;
950
951        let query_embedding = engine.create_query_embedding("test query")?;
952        assert_eq!(query_embedding.dimensions, 128);
953
954        Ok(())
955    }
956
957    #[test]
958    fn test_cosine_similarity() -> Result<()> {
959        let engine = PersonalizedSearchEngine::new_default()?;
960
961        let a = vec![1.0, 0.0, 0.0];
962        let b = vec![1.0, 0.0, 0.0];
963
964        let similarity = engine.cosine_similarity(&a, &b);
965        assert!((similarity - 1.0).abs() < 0.001);
966
967        Ok(())
968    }
969}