Skip to main content

oxirs_vec/
personalized_search.rs

1//! Personalized vector search with user-specific embeddings and preferences
2//!
3//! This module provides personalized search capabilities that adapt to individual
4//! user behavior, preferences, and interaction history. It maintains user-specific
5//! embeddings that evolve over time based on feedback signals.
6//!
7//! # Features
8//!
9//! - **User embeddings**: Learn and maintain personalized user representations
10//! - **Collaborative filtering**: Leverage behavior of similar users
11//! - **Contextual bandits**: Balance exploration vs exploitation
12//! - **Preference learning**: Adapt to explicit and implicit feedback
13//! - **Privacy-aware**: Support for federated and differential privacy
14//! - **Real-time adaptation**: Update user models with each interaction
15//!
16//! # Example
17//!
18//! ```rust,no_run
19//! use oxirs_vec::personalized_search::{PersonalizedSearchEngine, UserFeedback, FeedbackType};
20//!
21//! // Create personalized search engine
22//! let mut engine = PersonalizedSearchEngine::new_default()?;
23//!
24//! // Register user
25//! engine.register_user("user123", None)?;
26//!
27//! // Search with personalization
28//! let results = engine.personalized_search("user123", "machine learning", 10)?;
29//!
30//! // Provide feedback
31//! engine.record_feedback(UserFeedback {
32//!     user_id: "user123".to_string(),
33//!     item_id: results[0].id.clone(),
34//!     feedback_type: FeedbackType::Click,
35//!     score: 1.0,
36//!     timestamp: std::time::SystemTime::now(),
37//!     metadata: Default::default(),
38//! })?;
39//! # Ok::<(), anyhow::Error>(())
40//! ```
41
42use crate::Vector;
43use crate::VectorStore;
44use anyhow::{anyhow, Result};
45use parking_lot::RwLock;
46use scirs2_core::random::RngExt;
47use serde::{Deserialize, Serialize};
48use std::collections::HashMap;
49use std::sync::Arc;
50use std::time::{Duration, SystemTime};
51
52/// Type alias for the similarity matrix between users
53type SimilarityMatrix = Arc<RwLock<Option<HashMap<(String, String), f32>>>>;
54
55/// Personalized search engine that maintains user-specific models
56pub struct PersonalizedSearchEngine {
57    config: PersonalizationConfig,
58    vector_store: Arc<RwLock<VectorStore>>,
59    user_profiles: Arc<RwLock<HashMap<String, UserProfile>>>,
60    item_profiles: Arc<RwLock<HashMap<String, ItemProfile>>>,
61    interaction_history: Arc<RwLock<Vec<UserInteraction>>>,
62    similarity_matrix: SimilarityMatrix,
63}
64
65/// Configuration for personalized search
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct PersonalizationConfig {
68    /// Dimension of user embeddings
69    pub user_embedding_dim: usize,
70    /// Learning rate for user embedding updates
71    pub learning_rate: f32,
72    /// Decay factor for older interactions
73    pub time_decay_factor: f32,
74    /// Weight for collaborative filtering
75    pub collaborative_weight: f32,
76    /// Weight for content-based filtering
77    pub content_weight: f32,
78    /// Enable contextual bandits
79    pub enable_bandits: bool,
80    /// Exploration rate for bandits
81    pub exploration_rate: f32,
82    /// Enable differential privacy
83    pub enable_privacy: bool,
84    /// Privacy epsilon parameter
85    pub privacy_epsilon: f32,
86    /// Minimum interactions before personalization
87    pub min_interactions: usize,
88    /// User similarity threshold
89    pub user_similarity_threshold: f32,
90    /// Enable real-time updates
91    pub enable_realtime_updates: bool,
92    /// Cold start strategy
93    pub cold_start_strategy: ColdStartStrategy,
94}
95
96impl Default for PersonalizationConfig {
97    fn default() -> Self {
98        Self {
99            user_embedding_dim: 128,
100            learning_rate: 0.01,
101            time_decay_factor: 0.95,
102            collaborative_weight: 0.4,
103            content_weight: 0.6,
104            enable_bandits: true,
105            exploration_rate: 0.1,
106            enable_privacy: false,
107            privacy_epsilon: 1.0,
108            min_interactions: 5,
109            user_similarity_threshold: 0.7,
110            enable_realtime_updates: true,
111            cold_start_strategy: ColdStartStrategy::PopularityBased,
112        }
113    }
114}
115
116/// Strategy for handling new users (cold start problem)
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub enum ColdStartStrategy {
119    /// Use global popularity
120    PopularityBased,
121    /// Use demographic information
122    DemographicBased,
123    /// Use random exploration
124    RandomExploration,
125    /// Use hybrid approach
126    Hybrid,
127}
128
129/// User profile containing personalized embedding and preferences
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct UserProfile {
132    pub user_id: String,
133    pub embedding: Vec<f32>,
134    pub preferences: HashMap<String, f32>,
135    pub interaction_count: usize,
136    pub last_updated: SystemTime,
137    pub demographics: Option<UserDemographics>,
138    pub similar_users: Vec<(String, f32)>, // (user_id, similarity)
139    pub favorite_categories: HashMap<String, f32>,
140    pub negative_items: Vec<String>, // Disliked items
141}
142
143/// User demographic information
144#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct UserDemographics {
146    pub age_group: Option<String>,
147    pub location: Option<String>,
148    pub language: Option<String>,
149    pub interests: Vec<String>,
150}
151
152/// Item profile with popularity and category information
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct ItemProfile {
155    pub item_id: String,
156    pub embedding: Vec<f32>,
157    pub popularity_score: f32,
158    pub categories: Vec<String>,
159    pub interaction_count: usize,
160    pub average_rating: f32,
161    pub last_accessed: SystemTime,
162}
163
164/// User interaction record
165#[derive(Debug, Clone, Serialize, Deserialize)]
166pub struct UserInteraction {
167    pub user_id: String,
168    pub item_id: String,
169    pub interaction_type: InteractionType,
170    pub score: f32,
171    pub timestamp: SystemTime,
172    pub context: HashMap<String, String>,
173}
174
175/// Type of user interaction
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub enum InteractionType {
178    View,
179    Click,
180    Like,
181    Dislike,
182    Share,
183    Purchase,
184    Rating(f32),
185    DwellTime(Duration),
186    Custom(String),
187}
188
189/// User feedback for model updates
190#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct UserFeedback {
192    pub user_id: String,
193    pub item_id: String,
194    pub feedback_type: FeedbackType,
195    pub score: f32,
196    pub timestamp: SystemTime,
197    pub metadata: HashMap<String, String>,
198}
199
200/// Type of feedback signal
201#[derive(Debug, Clone, Serialize, Deserialize)]
202pub enum FeedbackType {
203    Explicit(f32), // Rating
204    Click,         // Binary positive signal
205    View,          // Implicit interest
206    Skip,          // Negative signal
207    Purchase,      // Strong positive signal
208    Share,         // Strong positive signal
209    LongDwell,     // Time-based positive
210    QuickBounce,   // Time-based negative
211    Custom(String),
212}
213
214/// Personalized search result
215#[derive(Debug, Clone, Serialize, Deserialize)]
216pub struct PersonalizedResult {
217    pub id: String,
218    pub score: f32,
219    pub personalization_score: f32,
220    pub content_score: f32,
221    pub collaborative_score: f32,
222    pub exploration_bonus: f32,
223    pub metadata: HashMap<String, String>,
224    pub explanation: Option<String>,
225}
226
227impl PersonalizedSearchEngine {
228    /// Create a new personalized search engine with default configuration
229    pub fn new_default() -> Result<Self> {
230        Self::new(PersonalizationConfig::default(), None)
231    }
232
233    /// Create a new personalized search engine with custom configuration
234    pub fn new(config: PersonalizationConfig, vector_store: Option<VectorStore>) -> Result<Self> {
235        let default_store = VectorStore::new();
236        let vector_store = Arc::new(RwLock::new(vector_store.unwrap_or(default_store)));
237
238        Ok(Self {
239            config,
240            vector_store,
241            user_profiles: Arc::new(RwLock::new(HashMap::new())),
242            item_profiles: Arc::new(RwLock::new(HashMap::new())),
243            interaction_history: Arc::new(RwLock::new(Vec::new())),
244            similarity_matrix: Arc::new(RwLock::new(None)),
245        })
246    }
247
248    /// Register a new user
249    pub fn register_user(
250        &mut self,
251        user_id: impl Into<String>,
252        demographics: Option<UserDemographics>,
253    ) -> Result<()> {
254        let user_id = user_id.into();
255
256        // Initialize user embedding
257        let embedding = self.initialize_user_embedding(&user_id, demographics.as_ref())?;
258
259        let profile = UserProfile {
260            user_id: user_id.clone(),
261            embedding,
262            preferences: HashMap::new(),
263            interaction_count: 0,
264            last_updated: SystemTime::now(),
265            demographics,
266            similar_users: Vec::new(),
267            favorite_categories: HashMap::new(),
268            negative_items: Vec::new(),
269        };
270
271        self.user_profiles.write().insert(user_id, profile);
272
273        Ok(())
274    }
275
276    /// Perform personalized search for a user
277    pub fn personalized_search(
278        &self,
279        user_id: impl Into<String>,
280        query: impl Into<String>,
281        k: usize,
282    ) -> Result<Vec<PersonalizedResult>> {
283        let user_id = user_id.into();
284        let query = query.into();
285
286        // Get user profile
287        let user_profiles = self.user_profiles.read();
288        let user_profile = user_profiles
289            .get(&user_id)
290            .ok_or_else(|| anyhow!("User not found: {}", user_id))?;
291
292        // Check if user has enough interactions for personalization
293        let use_personalization = user_profile.interaction_count >= self.config.min_interactions;
294
295        // Get base search results (content-based)
296        let base_results = self.content_based_search(&query, k * 3)?;
297
298        // Apply personalization
299        let personalized_results = if use_personalization {
300            self.apply_personalization(&user_id, base_results, k)?
301        } else {
302            self.apply_cold_start_strategy(&user_id, base_results, k)?
303        };
304
305        Ok(personalized_results)
306    }
307
308    /// Content-based search without personalization
309    fn content_based_search(&self, query: &str, k: usize) -> Result<Vec<PersonalizedResult>> {
310        // Simple text embedding (in production, use proper embedding model)
311        let _query_embedding = self.create_query_embedding(query)?;
312
313        // Search in vector store using text query
314        let store = self.vector_store.read();
315        let results = store.similarity_search(query, k)?;
316
317        // Convert to PersonalizedResult
318        Ok(results
319            .into_iter()
320            .map(|(id, score)| PersonalizedResult {
321                id,
322                score,
323                personalization_score: 0.0,
324                content_score: score,
325                collaborative_score: 0.0,
326                exploration_bonus: 0.0,
327                metadata: HashMap::new(),
328                explanation: None,
329            })
330            .collect())
331    }
332
333    /// Apply personalization to search results
334    fn apply_personalization(
335        &self,
336        user_id: &str,
337        mut results: Vec<PersonalizedResult>,
338        k: usize,
339    ) -> Result<Vec<PersonalizedResult>> {
340        let user_profiles = self.user_profiles.read();
341        let user_profile = user_profiles
342            .get(user_id)
343            .ok_or_else(|| anyhow!("User not found"))?;
344
345        // Compute collaborative filtering scores
346        for result in &mut results {
347            // Collaborative score based on similar users
348            let collab_score = self.compute_collaborative_score(user_profile, &result.id)?;
349
350            // Personalization score based on user embedding
351            let personal_score = self.compute_personalization_score(user_profile, &result.id)?;
352
353            // Exploration bonus (contextual bandits)
354            let exploration_bonus = if self.config.enable_bandits {
355                self.compute_exploration_bonus(user_profile, &result.id)?
356            } else {
357                0.0
358            };
359
360            // Combine scores
361            result.collaborative_score = collab_score;
362            result.personalization_score = personal_score;
363            result.exploration_bonus = exploration_bonus;
364
365            result.score = self.config.content_weight * result.content_score
366                + self.config.collaborative_weight * collab_score
367                + (1.0 - self.config.content_weight - self.config.collaborative_weight)
368                    * personal_score
369                + exploration_bonus;
370
371            // Generate explanation
372            result.explanation = Some(self.generate_explanation(result));
373        }
374
375        // Re-rank by combined score
376        results.sort_by(|a, b| {
377            b.score
378                .partial_cmp(&a.score)
379                .unwrap_or(std::cmp::Ordering::Equal)
380        });
381
382        // Apply diversity
383        let diversified = self.apply_diversity(&results, k)?;
384
385        Ok(diversified)
386    }
387
388    /// Compute collaborative filtering score
389    fn compute_collaborative_score(
390        &self,
391        user_profile: &UserProfile,
392        item_id: &str,
393    ) -> Result<f32> {
394        let item_profiles = self.item_profiles.read();
395
396        if let Some(item_profile) = item_profiles.get(item_id) {
397            // Score based on similar users' interactions
398            let mut collab_score = 0.0;
399            let mut total_weight = 0.0;
400
401            for (similar_user_id, similarity) in &user_profile.similar_users {
402                // Check if similar user interacted with this item
403                let interactions = self.interaction_history.read();
404                let user_interacted = interactions.iter().any(|i| {
405                    &i.user_id == similar_user_id && i.item_id == item_id && i.score > 0.0
406                });
407
408                if user_interacted {
409                    collab_score += similarity;
410                    total_weight += similarity;
411                }
412            }
413
414            if total_weight > 0.0 {
415                collab_score /= total_weight;
416            }
417
418            // Add popularity bonus
419            collab_score += item_profile.popularity_score * 0.1;
420
421            Ok(collab_score.min(1.0))
422        } else {
423            Ok(0.0)
424        }
425    }
426
427    /// Compute personalization score based on user embedding
428    fn compute_personalization_score(
429        &self,
430        user_profile: &UserProfile,
431        item_id: &str,
432    ) -> Result<f32> {
433        let item_profiles = self.item_profiles.read();
434
435        if let Some(item_profile) = item_profiles.get(item_id) {
436            // Compute cosine similarity between user and item embeddings
437            let similarity =
438                self.cosine_similarity(&user_profile.embedding, &item_profile.embedding);
439
440            // Check negative items
441            if user_profile.negative_items.contains(&item_id.to_string()) {
442                return Ok(similarity * 0.5); // Penalize disliked items
443            }
444
445            // Boost based on category preferences
446            let category_boost = item_profile
447                .categories
448                .iter()
449                .filter_map(|cat| user_profile.favorite_categories.get(cat))
450                .sum::<f32>()
451                / item_profile.categories.len().max(1) as f32;
452
453            Ok((similarity + category_boost * 0.3).min(1.0))
454        } else {
455            Ok(0.0)
456        }
457    }
458
459    /// Compute exploration bonus using contextual bandits
460    fn compute_exploration_bonus(&self, user_profile: &UserProfile, item_id: &str) -> Result<f32> {
461        let item_profiles = self.item_profiles.read();
462
463        if let Some(item_profile) = item_profiles.get(item_id) {
464            // UCB-style exploration bonus
465            let n = user_profile.interaction_count as f32;
466            let n_i = item_profile.interaction_count as f32;
467
468            if n_i == 0.0 {
469                // High exploration bonus for unseen items
470                return Ok(self.config.exploration_rate);
471            }
472
473            let exploration_bonus = self.config.exploration_rate * ((2.0 * n.ln() / n_i).sqrt());
474
475            Ok(exploration_bonus.min(0.5))
476        } else {
477            Ok(0.0)
478        }
479    }
480
481    /// Apply cold start strategy for new users
482    fn apply_cold_start_strategy(
483        &self,
484        _user_id: &str,
485        mut results: Vec<PersonalizedResult>,
486        k: usize,
487    ) -> Result<Vec<PersonalizedResult>> {
488        match self.config.cold_start_strategy {
489            ColdStartStrategy::PopularityBased => {
490                // Boost popular items
491                let item_profiles = self.item_profiles.read();
492
493                for result in &mut results {
494                    if let Some(item_profile) = item_profiles.get(&result.id) {
495                        result.score += item_profile.popularity_score * 0.3;
496                    }
497                }
498
499                results.sort_by(|a, b| {
500                    b.score
501                        .partial_cmp(&a.score)
502                        .unwrap_or(std::cmp::Ordering::Equal)
503                });
504            }
505            ColdStartStrategy::RandomExploration => {
506                // Add random exploration
507                use scirs2_core::random::rng;
508                let mut rng_instance = rng();
509
510                for result in &mut results {
511                    // Generate random value between 0.0 and 0.2
512                    let random_val = (rng_instance.random::<u64>() as f32 / u64::MAX as f32) * 0.2;
513                    result.score += random_val;
514                }
515
516                results.sort_by(|a, b| {
517                    b.score
518                        .partial_cmp(&a.score)
519                        .unwrap_or(std::cmp::Ordering::Equal)
520                });
521            }
522            ColdStartStrategy::DemographicBased => {
523                // Use demographic-based recommendations (simplified)
524                results.sort_by(|a, b| {
525                    b.score
526                        .partial_cmp(&a.score)
527                        .unwrap_or(std::cmp::Ordering::Equal)
528                });
529            }
530            ColdStartStrategy::Hybrid => {
531                // Combine multiple strategies
532                use scirs2_core::random::rng;
533                let item_profiles = self.item_profiles.read();
534                let mut rng_instance = rng();
535
536                for result in &mut results {
537                    if let Some(item_profile) = item_profiles.get(&result.id) {
538                        let random_val =
539                            (rng_instance.random::<u64>() as f32 / u64::MAX as f32) * 0.1;
540                        result.score += item_profile.popularity_score * 0.2 + random_val;
541                    }
542                }
543
544                results.sort_by(|a, b| {
545                    b.score
546                        .partial_cmp(&a.score)
547                        .unwrap_or(std::cmp::Ordering::Equal)
548                });
549            }
550        }
551
552        Ok(results.into_iter().take(k).collect())
553    }
554
555    /// Record user feedback and update user profile
556    pub fn record_feedback(&mut self, feedback: UserFeedback) -> Result<()> {
557        // Convert feedback to interaction
558        let interaction = UserInteraction {
559            user_id: feedback.user_id.clone(),
560            item_id: feedback.item_id.clone(),
561            interaction_type: Self::feedback_to_interaction_type(&feedback.feedback_type),
562            score: feedback.score,
563            timestamp: feedback.timestamp,
564            context: feedback.metadata.clone(),
565        };
566
567        // Store interaction
568        self.interaction_history.write().push(interaction.clone());
569
570        // Update user profile if real-time updates enabled
571        if self.config.enable_realtime_updates {
572            self.update_user_profile(&feedback.user_id, &interaction)?;
573        }
574
575        // Update item profile
576        self.update_item_profile(&feedback.item_id, &interaction)?;
577
578        Ok(())
579    }
580
581    /// Update user profile based on interaction
582    fn update_user_profile(&mut self, user_id: &str, interaction: &UserInteraction) -> Result<()> {
583        let mut user_profiles = self.user_profiles.write();
584
585        if let Some(profile) = user_profiles.get_mut(user_id) {
586            // Update interaction count
587            profile.interaction_count += 1;
588            profile.last_updated = SystemTime::now();
589
590            // Get item embedding
591            let item_profiles = self.item_profiles.read();
592            if let Some(item_profile) = item_profiles.get(&interaction.item_id) {
593                // Update user embedding using gradient descent
594                let learning_rate = self.config.learning_rate;
595
596                for (i, emb_val) in profile.embedding.iter_mut().enumerate() {
597                    if i < item_profile.embedding.len() {
598                        let target = item_profile.embedding[i];
599                        let gradient = (target - *emb_val) * interaction.score;
600                        *emb_val += learning_rate * gradient;
601                    }
602                }
603
604                // Normalize embedding
605                let norm: f32 = profile.embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
606                if norm > 0.0 {
607                    profile.embedding.iter_mut().for_each(|x| *x /= norm);
608                }
609
610                // Update category preferences
611                for category in &item_profile.categories {
612                    let current = profile
613                        .favorite_categories
614                        .get(category)
615                        .copied()
616                        .unwrap_or(0.0);
617                    let updated = current * 0.9 + interaction.score * 0.1;
618                    profile
619                        .favorite_categories
620                        .insert(category.clone(), updated);
621                }
622
623                // Update negative items
624                if interaction.score < 0.0 {
625                    profile.negative_items.push(interaction.item_id.clone());
626                }
627            }
628        }
629
630        Ok(())
631    }
632
633    /// Update item profile based on interaction
634    fn update_item_profile(&mut self, item_id: &str, interaction: &UserInteraction) -> Result<()> {
635        let mut item_profiles = self.item_profiles.write();
636
637        if let Some(profile) = item_profiles.get_mut(item_id) {
638            profile.interaction_count += 1;
639            profile.last_accessed = SystemTime::now();
640
641            // Update average rating
642            let old_avg = profile.average_rating;
643            let count = profile.interaction_count as f32;
644            profile.average_rating = (old_avg * (count - 1.0) + interaction.score) / count;
645
646            // Update popularity score (decayed)
647            profile.popularity_score = profile.popularity_score * 0.95 + interaction.score * 0.05;
648        }
649
650        Ok(())
651    }
652
653    /// Update user similarity matrix
654    pub fn update_user_similarities(&mut self) -> Result<()> {
655        let user_profiles = self.user_profiles.read();
656        let user_ids: Vec<String> = user_profiles.keys().cloned().collect();
657
658        for user_id in &user_ids {
659            if let Some(user_profile) = user_profiles.get(user_id) {
660                let mut similar_users = Vec::new();
661
662                // Compute similarities with all other users
663                for other_id in &user_ids {
664                    if other_id != user_id {
665                        if let Some(other_profile) = user_profiles.get(other_id) {
666                            let similarity = self.cosine_similarity(
667                                &user_profile.embedding,
668                                &other_profile.embedding,
669                            );
670
671                            if similarity >= self.config.user_similarity_threshold {
672                                similar_users.push((other_id.clone(), similarity));
673                            }
674                        }
675                    }
676                }
677
678                // Sort by similarity and keep top 10
679                similar_users
680                    .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
681                similar_users.truncate(10);
682
683                // Update user profile (need to drop read lock and acquire write lock)
684                drop(user_profiles);
685                let mut user_profiles = self.user_profiles.write();
686                if let Some(profile) = user_profiles.get_mut(user_id) {
687                    profile.similar_users = similar_users;
688                }
689
690                return Ok(()); // Early return to avoid deadlock
691            }
692        }
693
694        Ok(())
695    }
696
697    /// Apply diversity to results
698    fn apply_diversity(
699        &self,
700        results: &[PersonalizedResult],
701        k: usize,
702    ) -> Result<Vec<PersonalizedResult>> {
703        // MMR-style diversity
704        let mut diversified = Vec::new();
705        let mut remaining: Vec<PersonalizedResult> = results.to_vec();
706
707        if !remaining.is_empty() {
708            // Add highest scored item first
709            diversified.push(remaining.remove(0));
710        }
711
712        let lambda = 0.7; // Relevance vs diversity trade-off
713
714        while diversified.len() < k && !remaining.is_empty() {
715            let mut best_idx = 0;
716            let mut best_score = f32::NEG_INFINITY;
717
718            for (i, candidate) in remaining.iter().enumerate() {
719                // Compute minimum similarity to already selected items
720                let mut min_similarity = 1.0f32;
721
722                for selected in &diversified {
723                    let similarity = if selected.metadata.get("category")
724                        == candidate.metadata.get("category")
725                    {
726                        0.8
727                    } else {
728                        0.2
729                    };
730
731                    min_similarity = min_similarity.min(similarity);
732                }
733
734                // MMR score
735                let mmr_score = lambda * candidate.score + (1.0 - lambda) * (1.0 - min_similarity);
736
737                if mmr_score > best_score {
738                    best_score = mmr_score;
739                    best_idx = i;
740                }
741            }
742
743            diversified.push(remaining.remove(best_idx));
744        }
745
746        Ok(diversified)
747    }
748
749    /// Generate explanation for personalized result
750    fn generate_explanation(&self, result: &PersonalizedResult) -> String {
751        let mut reasons = Vec::new();
752
753        if result.personalization_score > 0.5 {
754            reasons.push("matches your interests");
755        }
756
757        if result.collaborative_score > 0.5 {
758            reasons.push("liked by similar users");
759        }
760
761        if result.exploration_bonus > 0.1 {
762            reasons.push("new discovery");
763        }
764
765        if reasons.is_empty() {
766            reasons.push("relevant to your query");
767        }
768
769        format!("Recommended because: {}", reasons.join(", "))
770    }
771
772    /// Initialize user embedding
773    fn initialize_user_embedding(
774        &self,
775        _user_id: &str,
776        demographics: Option<&UserDemographics>,
777    ) -> Result<Vec<f32>> {
778        use scirs2_core::random::rng;
779        let mut embedding = vec![0.0f32; self.config.user_embedding_dim];
780
781        if let Some(demo) = demographics {
782            // Use demographics to seed embedding
783            for (_i, interest) in demo.interests.iter().enumerate().take(embedding.len() / 2) {
784                let hash = Self::hash_string(interest);
785                let idx = (hash % self.config.user_embedding_dim as u64) as usize;
786                embedding[idx] = 0.5;
787            }
788        } else {
789            // Random initialization
790            let mut rng_instance = rng();
791
792            for val in &mut embedding {
793                // Generate random value between -0.1 and 0.1
794                let random_val =
795                    (rng_instance.random::<u64>() as f32 / u64::MAX as f32) * 0.2 - 0.1;
796                *val = random_val;
797            }
798        }
799
800        // Normalize
801        let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
802        if norm > 0.0 {
803            embedding.iter_mut().for_each(|x| *x /= norm);
804        }
805
806        Ok(embedding)
807    }
808
809    /// Create query embedding
810    fn create_query_embedding(&self, query: &str) -> Result<Vector> {
811        // Simple token-based embedding (in production, use proper model)
812        let tokens: Vec<String> = query
813            .to_lowercase()
814            .split_whitespace()
815            .map(String::from)
816            .collect();
817
818        let mut embedding = vec![0.0f32; 128]; // Default dimension
819
820        for token in tokens {
821            let hash = Self::hash_string(&token);
822            let idx = (hash % embedding.len() as u64) as usize;
823            embedding[idx] += 1.0;
824        }
825
826        // Normalize
827        let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
828        if norm > 0.0 {
829            embedding.iter_mut().for_each(|x| *x /= norm);
830        }
831
832        Ok(Vector::new(embedding))
833    }
834
835    /// Compute cosine similarity between two vectors
836    fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 {
837        if a.len() != b.len() {
838            return 0.0;
839        }
840
841        let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
842        let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
843        let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
844
845        if norm_a == 0.0 || norm_b == 0.0 {
846            return 0.0;
847        }
848
849        dot_product / (norm_a * norm_b)
850    }
851
852    /// Convert feedback type to interaction type
853    fn feedback_to_interaction_type(feedback_type: &FeedbackType) -> InteractionType {
854        match feedback_type {
855            FeedbackType::Explicit(rating) => InteractionType::Rating(*rating),
856            FeedbackType::Click => InteractionType::Click,
857            FeedbackType::View => InteractionType::View,
858            FeedbackType::Skip => InteractionType::Custom("skip".to_string()),
859            FeedbackType::Purchase => InteractionType::Purchase,
860            FeedbackType::Share => InteractionType::Share,
861            FeedbackType::LongDwell => InteractionType::DwellTime(Duration::from_secs(60)),
862            FeedbackType::QuickBounce => InteractionType::DwellTime(Duration::from_secs(5)),
863            FeedbackType::Custom(name) => InteractionType::Custom(name.clone()),
864        }
865    }
866
867    /// Hash string to u64
868    fn hash_string(s: &str) -> u64 {
869        use std::collections::hash_map::DefaultHasher;
870        use std::hash::{Hash, Hasher};
871
872        let mut hasher = DefaultHasher::new();
873        s.hash(&mut hasher);
874        hasher.finish()
875    }
876
877    /// Get user profile
878    pub fn get_user_profile(&self, user_id: &str) -> Option<UserProfile> {
879        self.user_profiles.read().get(user_id).cloned()
880    }
881
882    /// Get statistics
883    pub fn get_statistics(&self) -> PersonalizationStatistics {
884        let user_profiles = self.user_profiles.read();
885        let item_profiles = self.item_profiles.read();
886        let interactions = self.interaction_history.read();
887
888        PersonalizationStatistics {
889            total_users: user_profiles.len(),
890            total_items: item_profiles.len(),
891            total_interactions: interactions.len(),
892            average_interactions_per_user: if user_profiles.is_empty() {
893                0.0
894            } else {
895                interactions.len() as f32 / user_profiles.len() as f32
896            },
897        }
898    }
899}
900
901/// Statistics about personalization
902#[derive(Debug, Clone, Serialize, Deserialize)]
903pub struct PersonalizationStatistics {
904    pub total_users: usize,
905    pub total_items: usize,
906    pub total_interactions: usize,
907    pub average_interactions_per_user: f32,
908}
909
910#[cfg(test)]
911mod tests {
912    use super::*;
913
914    #[test]
915    fn test_register_user() -> Result<()> {
916        let mut engine = PersonalizedSearchEngine::new_default()?;
917
918        engine.register_user("user1", None)?;
919
920        let profile = engine.get_user_profile("user1");
921        assert!(profile.is_some());
922
923        Ok(())
924    }
925
926    #[test]
927    fn test_feedback_recording() -> Result<()> {
928        let mut engine = PersonalizedSearchEngine::new_default()?;
929
930        engine.register_user("user1", None)?;
931
932        let feedback = UserFeedback {
933            user_id: "user1".to_string(),
934            item_id: "item1".to_string(),
935            feedback_type: FeedbackType::Click,
936            score: 1.0,
937            timestamp: SystemTime::now(),
938            metadata: HashMap::new(),
939        };
940
941        engine.record_feedback(feedback)?;
942
943        let stats = engine.get_statistics();
944        assert_eq!(stats.total_interactions, 1);
945
946        Ok(())
947    }
948
949    #[test]
950    fn test_cold_start_strategy() -> Result<()> {
951        let engine = PersonalizedSearchEngine::new_default()?;
952
953        let query_embedding = engine.create_query_embedding("test query")?;
954        assert_eq!(query_embedding.dimensions, 128);
955
956        Ok(())
957    }
958
959    #[test]
960    fn test_cosine_similarity() -> Result<()> {
961        let engine = PersonalizedSearchEngine::new_default()?;
962
963        let a = vec![1.0, 0.0, 0.0];
964        let b = vec![1.0, 0.0, 0.0];
965
966        let similarity = engine.cosine_similarity(&a, &b);
967        assert!((similarity - 1.0).abs() < 0.001);
968
969        Ok(())
970    }
971}