Skip to main content

shodh_memory/memory/
pattern_detection.rs

1//! Pattern-Triggered Replay Detection (PIPE-2)
2//!
3//! Implements biologically-inspired pattern detection for memory consolidation.
4//! Instead of fixed-interval replay, this module detects meaningful patterns
5//! that should trigger immediate consolidation.
6//!
7//! ## Neuroscience Basis
8//! Based on hippocampal sharp-wave ripple research (Rasch & Born 2013):
9//! - Consolidation is triggered by coherent neural activity patterns
10//! - High-value memories and semantically related clusters get priority
11//! - Emotional significance (salience spikes) triggers immediate replay
12//!
13//! ## Pattern Types
14//! 1. **Entity Co-occurrence**: Memories sharing multiple named entities
15//! 2. **Semantic Clustering**: Dense groups of semantically similar memories
16//! 3. **Temporal Clustering**: Memories from the same session/timeframe
17//! 4. **Salience Spikes**: High importance/arousal memories
18//! 5. **Behavioral Changes**: Topic switches, user corrections
19
20use crate::constants::{
21    BEHAVIORAL_PATTERN_WINDOW_HOURS, ENTITY_COOCCURRENCE_THRESHOLD, ENTITY_PATTERN_CONFIDENCE,
22    HIGH_AROUSAL_THRESHOLD, HIGH_IMPORTANCE_THRESHOLD, MIN_CLUSTER_SIZE, MIN_MEMORIES_PER_PATTERN,
23    MIN_MEMORIES_PER_SESSION, SEMANTIC_CLUSTER_THRESHOLD, SURPRISE_THRESHOLD,
24    TEMPORAL_CLUSTER_WINDOW_SECS,
25};
26use chrono::{DateTime, Duration, Utc};
27use serde::{Deserialize, Serialize};
28use std::collections::{HashMap, HashSet, VecDeque};
29
30/// Maximum recent memories to track for pattern detection
31const MAX_RECENT_MEMORIES: usize = 500;
32
33/// Maximum salience spikes to track
34const MAX_SALIENCE_SPIKES: usize = 100;
35
36/// Memory data for pattern analysis
37#[derive(Debug, Clone)]
38pub struct PatternMemory {
39    pub id: String,
40    pub content_preview: String,
41    pub entities: Vec<String>,
42    pub importance: f32,
43    pub arousal: f32,
44    pub created_at: DateTime<Utc>,
45    pub embedding_hash: Option<u64>,
46    pub session_id: Option<String>,
47    pub memory_type: String,
48}
49
50/// Statistics for an entity group pattern
51#[derive(Debug, Clone, Default)]
52pub struct EntityPatternStats {
53    pub memory_ids: Vec<String>,
54    pub first_seen: Option<DateTime<Utc>>,
55    pub last_seen: Option<DateTime<Utc>>,
56    pub total_occurrences: usize,
57    /// Whether this pattern has triggered a replay (consumed)
58    pub triggered: bool,
59}
60
61/// A detected semantic cluster
62#[derive(Debug, Clone)]
63pub struct SemanticCluster {
64    pub memory_ids: Vec<String>,
65    pub centroid_memory_id: String,
66    pub avg_similarity: f32,
67    pub formed_at: DateTime<Utc>,
68    /// Whether this cluster has triggered a replay (consumed)
69    pub triggered: bool,
70}
71
72/// A detected temporal cluster (session)
73#[derive(Debug, Clone)]
74pub struct TemporalCluster {
75    pub memory_ids: Vec<String>,
76    pub session_start: DateTime<Utc>,
77    pub session_end: DateTime<Utc>,
78    pub session_id: Option<String>,
79}
80
81/// A salience spike event
82#[derive(Debug, Clone)]
83pub struct SalienceEvent {
84    pub memory_id: String,
85    pub importance: f32,
86    pub arousal: f32,
87    pub surprise_factor: f32,
88    pub detected_at: DateTime<Utc>,
89    /// Whether this spike has triggered a replay (consumed)
90    pub triggered: bool,
91}
92
93/// Type of behavioral change detected
94#[derive(Debug, Clone, Serialize, Deserialize)]
95#[serde(tag = "type", rename_all = "snake_case")]
96pub enum BehaviorChangeType {
97    /// User switched topics
98    TopicSwitch { from: String, to: String },
99    /// User corrected/negated something
100    UserCorrection { correction_keywords: Vec<String> },
101    /// User switched projects
102    ProjectSwitch {
103        from: Option<String>,
104        to: Option<String>,
105    },
106    /// User is repeatedly querying the same topic
107    QueryRepetition { repeated_topic: String },
108}
109
110/// Behavioral context tracking
111#[derive(Debug, Clone, Default)]
112pub struct BehavioralContext {
113    pub current_topic: Option<String>,
114    pub current_project: Option<String>,
115    pub recent_queries: VecDeque<String>,
116    pub last_correction: Option<DateTime<Utc>>,
117}
118
119/// Trigger types for pattern-based replay
120#[derive(Debug, Clone, Serialize, Deserialize)]
121#[serde(tag = "trigger_type", rename_all = "snake_case")]
122pub enum ReplayTrigger {
123    /// Multiple memories share the same entity group
124    EntityCoOccurrence {
125        entities: Vec<String>,
126        memory_ids: Vec<String>,
127        overlap_score: f32,
128        confidence: f32,
129    },
130
131    /// Dense cluster of semantically similar memories
132    SemanticCluster {
133        memory_ids: Vec<String>,
134        centroid_id: String,
135        avg_similarity: f32,
136        cluster_size: usize,
137    },
138
139    /// Memories created within the same time window
140    TemporalCluster {
141        memory_ids: Vec<String>,
142        window_secs: i64,
143        session_id: Option<String>,
144    },
145
146    /// High importance/arousal memory detected
147    SalienceSpike {
148        memory_id: String,
149        content_preview: String,
150        importance: f32,
151        arousal: f32,
152        surprise_factor: f32,
153    },
154
155    /// User behavior changed (topic switch, correction)
156    BehavioralPatternChange {
157        change_type: BehaviorChangeType,
158        affected_memory_ids: Vec<String>,
159        context: String,
160    },
161
162    /// Fallback: fixed time interval (legacy behavior)
163    TimerInterval,
164}
165
166impl ReplayTrigger {
167    /// Get a human-readable description of the trigger
168    pub fn description(&self) -> String {
169        match self {
170            ReplayTrigger::EntityCoOccurrence { entities, .. } => {
171                format!("Entity co-occurrence: {}", entities.join(", "))
172            }
173            ReplayTrigger::SemanticCluster { cluster_size, .. } => {
174                format!("Semantic cluster of {} memories", cluster_size)
175            }
176            ReplayTrigger::TemporalCluster { memory_ids, .. } => {
177                format!("Temporal cluster: {} memories in session", memory_ids.len())
178            }
179            ReplayTrigger::SalienceSpike {
180                content_preview, ..
181            } => {
182                format!("Salience spike: {}", content_preview)
183            }
184            ReplayTrigger::BehavioralPatternChange { change_type, .. } => match change_type {
185                BehaviorChangeType::TopicSwitch { from, to } => {
186                    format!("Topic switch: {} -> {}", from, to)
187                }
188                BehaviorChangeType::UserCorrection { .. } => "User correction detected".to_string(),
189                BehaviorChangeType::ProjectSwitch { from, to } => {
190                    format!(
191                        "Project switch: {} -> {}",
192                        from.as_deref().unwrap_or("none"),
193                        to.as_deref().unwrap_or("none")
194                    )
195                }
196                BehaviorChangeType::QueryRepetition { repeated_topic } => {
197                    format!("Repeated queries about: {}", repeated_topic)
198                }
199            },
200            ReplayTrigger::TimerInterval => "Fixed interval timer".to_string(),
201        }
202    }
203
204    /// Get the memory IDs affected by this trigger
205    pub fn memory_ids(&self) -> Vec<String> {
206        match self {
207            ReplayTrigger::EntityCoOccurrence { memory_ids, .. } => memory_ids.clone(),
208            ReplayTrigger::SemanticCluster { memory_ids, .. } => memory_ids.clone(),
209            ReplayTrigger::TemporalCluster { memory_ids, .. } => memory_ids.clone(),
210            ReplayTrigger::SalienceSpike { memory_id, .. } => vec![memory_id.clone()],
211            ReplayTrigger::BehavioralPatternChange {
212                affected_memory_ids,
213                ..
214            } => affected_memory_ids.clone(),
215            ReplayTrigger::TimerInterval => vec![],
216        }
217    }
218
219    /// Get trigger type name for metrics/logging
220    pub fn trigger_type_name(&self) -> &'static str {
221        match self {
222            ReplayTrigger::EntityCoOccurrence { .. } => "entity_cooccurrence",
223            ReplayTrigger::SemanticCluster { .. } => "semantic_cluster",
224            ReplayTrigger::TemporalCluster { .. } => "temporal_cluster",
225            ReplayTrigger::SalienceSpike { .. } => "salience_spike",
226            ReplayTrigger::BehavioralPatternChange { .. } => "behavioral_change",
227            ReplayTrigger::TimerInterval => "timer_interval",
228        }
229    }
230}
231
232/// Result of pattern detection cycle
233#[derive(Debug, Clone, Default)]
234pub struct PatternDetectionResult {
235    pub triggers: Vec<ReplayTrigger>,
236    pub entity_patterns_found: usize,
237    pub semantic_clusters_found: usize,
238    pub temporal_clusters_found: usize,
239    pub salience_spikes_found: usize,
240    pub behavioral_changes_found: usize,
241}
242
243/// Pattern detector for triggering memory replay
244///
245/// Monitors incoming memories and detects patterns that should
246/// trigger consolidation rather than waiting for fixed intervals.
247pub struct PatternDetector {
248    /// Recent memories for pattern analysis (bounded)
249    recent_memories: VecDeque<PatternMemory>,
250
251    /// Entity co-occurrence patterns: entity_group_key -> stats
252    entity_patterns: HashMap<String, EntityPatternStats>,
253
254    /// Detected semantic clusters (updated periodically)
255    semantic_clusters: Vec<SemanticCluster>,
256
257    /// Current temporal cluster (session-based)
258    current_temporal_cluster: Option<TemporalCluster>,
259
260    /// Recent salience spikes
261    salience_spikes: VecDeque<SalienceEvent>,
262
263    /// Behavioral context tracking
264    behavioral_context: BehavioralContext,
265
266    /// Running average of recent memory importance (for surprise detection)
267    importance_moving_avg: f32,
268
269    /// Count of memories seen (for moving average)
270    memories_seen: usize,
271
272    /// Last pattern detection timestamp
273    last_detection: DateTime<Utc>,
274}
275
276impl Default for PatternDetector {
277    fn default() -> Self {
278        Self::new()
279    }
280}
281
282impl PatternDetector {
283    pub fn new() -> Self {
284        Self {
285            recent_memories: VecDeque::with_capacity(MAX_RECENT_MEMORIES),
286            entity_patterns: HashMap::new(),
287            semantic_clusters: Vec::new(),
288            current_temporal_cluster: None,
289            salience_spikes: VecDeque::with_capacity(MAX_SALIENCE_SPIKES),
290            behavioral_context: BehavioralContext::default(),
291            importance_moving_avg: 0.5,
292            memories_seen: 0,
293            last_detection: Utc::now() - Duration::hours(1),
294        }
295    }
296
297    /// Register a new memory for pattern tracking
298    ///
299    /// Call this when a memory is created to enable pattern detection.
300    pub fn register_memory(&mut self, memory: PatternMemory) {
301        // Update moving average of importance
302        self.memories_seen += 1;
303        let alpha = 0.1_f32.min(1.0 / self.memories_seen as f32);
304        self.importance_moving_avg =
305            alpha * memory.importance + (1.0 - alpha) * self.importance_moving_avg;
306
307        // Track entity co-occurrence
308        if memory.entities.len() >= 2 {
309            let key = self.entity_group_key(&memory.entities);
310            let stats = self.entity_patterns.entry(key).or_default();
311            stats.memory_ids.push(memory.id.clone());
312            stats.total_occurrences += 1;
313            let now = Utc::now();
314            if stats.first_seen.is_none() {
315                stats.first_seen = Some(now);
316            }
317            stats.last_seen = Some(now);
318        }
319
320        // Update temporal cluster
321        self.update_temporal_cluster(&memory);
322
323        // Add to recent memories (with bounds)
324        if self.recent_memories.len() >= MAX_RECENT_MEMORIES {
325            self.recent_memories.pop_front();
326        }
327        self.recent_memories.push_back(memory);
328    }
329
330    /// Detect all patterns and return triggers
331    ///
332    /// This is the main entry point for pattern detection.
333    /// Call periodically (e.g., every 5 minutes) or after significant events.
334    pub fn detect_patterns(&mut self) -> PatternDetectionResult {
335        let mut result = PatternDetectionResult::default();
336        let now = Utc::now();
337
338        // 1. Check for entity co-occurrence patterns
339        let entity_triggers = self.detect_entity_patterns();
340        result.entity_patterns_found = entity_triggers.len();
341        result.triggers.extend(entity_triggers);
342
343        // 2. Check for temporal clusters
344        if let Some(trigger) = self.detect_temporal_cluster() {
345            result.temporal_clusters_found = 1;
346            result.triggers.push(trigger);
347        }
348
349        // 3. Check for salience spikes (process pending)
350        let salience_triggers = self.process_salience_spikes();
351        result.salience_spikes_found = salience_triggers.len();
352        result.triggers.extend(salience_triggers);
353
354        // Note: Semantic clustering requires embeddings and is done separately
355        // via detect_semantic_clusters() which takes similarity data as input
356
357        self.last_detection = now;
358        result
359    }
360
361    /// Check for salience spike (call when storing a new memory)
362    ///
363    /// Returns Some(trigger) if the memory is surprising enough to warrant
364    /// immediate replay.
365    pub fn check_salience_spike(&mut self, memory: &PatternMemory) -> Option<ReplayTrigger> {
366        // Calculate surprise factor
367        let surprise_factor = (memory.importance - self.importance_moving_avg).abs();
368
369        // Check thresholds
370        let is_spike = memory.importance > HIGH_IMPORTANCE_THRESHOLD
371            || memory.arousal > HIGH_AROUSAL_THRESHOLD
372            || surprise_factor > SURPRISE_THRESHOLD;
373
374        if is_spike {
375            let event = SalienceEvent {
376                memory_id: memory.id.clone(),
377                importance: memory.importance,
378                arousal: memory.arousal,
379                surprise_factor,
380                detected_at: Utc::now(),
381                triggered: false,
382            };
383
384            // Track spike
385            if self.salience_spikes.len() >= MAX_SALIENCE_SPIKES {
386                self.salience_spikes.pop_front();
387            }
388            self.salience_spikes.push_back(event.clone());
389
390            // Return immediate trigger for high-surprise events
391            if surprise_factor > SURPRISE_THRESHOLD * 1.5 || memory.arousal > 0.8 {
392                return Some(ReplayTrigger::SalienceSpike {
393                    memory_id: memory.id.clone(),
394                    content_preview: memory.content_preview.clone(),
395                    importance: memory.importance,
396                    arousal: memory.arousal,
397                    surprise_factor,
398                });
399            }
400        }
401
402        None
403    }
404
405    /// Detect semantic clusters from similarity data
406    ///
407    /// Call this with pre-computed similarity data (from vector search).
408    /// Returns cluster triggers if dense clusters are found.
409    pub fn detect_semantic_clusters(
410        &mut self,
411        similarities: &[(String, String, f32)], // (memory_id_1, memory_id_2, similarity)
412    ) -> Vec<ReplayTrigger> {
413        let mut triggers = Vec::new();
414
415        // Build adjacency list of similar memories
416        let mut adjacency: HashMap<String, Vec<(String, f32)>> = HashMap::new();
417        for (id1, id2, sim) in similarities {
418            if *sim >= SEMANTIC_CLUSTER_THRESHOLD {
419                adjacency
420                    .entry(id1.clone())
421                    .or_default()
422                    .push((id2.clone(), *sim));
423                adjacency
424                    .entry(id2.clone())
425                    .or_default()
426                    .push((id1.clone(), *sim));
427            }
428        }
429
430        // Find connected components (clusters)
431        let mut visited: HashSet<String> = HashSet::new();
432        let mut clusters: Vec<(Vec<String>, f32)> = Vec::new();
433
434        for start_id in adjacency.keys() {
435            if visited.contains(start_id) {
436                continue;
437            }
438
439            // BFS to find cluster
440            let mut cluster = Vec::new();
441            let mut queue = VecDeque::new();
442            let mut total_sim = 0.0_f32;
443            let mut sim_count = 0;
444
445            queue.push_back(start_id.clone());
446            visited.insert(start_id.clone());
447
448            while let Some(current) = queue.pop_front() {
449                cluster.push(current.clone());
450
451                if let Some(neighbors) = adjacency.get(&current) {
452                    for (neighbor, sim) in neighbors {
453                        if !visited.contains(neighbor) {
454                            visited.insert(neighbor.clone());
455                            queue.push_back(neighbor.clone());
456                            total_sim += sim;
457                            sim_count += 1;
458                        }
459                    }
460                }
461            }
462
463            if cluster.len() >= MIN_CLUSTER_SIZE {
464                let avg_sim = if sim_count > 0 {
465                    total_sim / sim_count as f32
466                } else {
467                    SEMANTIC_CLUSTER_THRESHOLD
468                };
469                clusters.push((cluster, avg_sim));
470            }
471        }
472
473        // Create triggers for significant clusters
474        for (memory_ids, avg_similarity) in clusters {
475            let centroid_id = memory_ids.first().cloned().unwrap_or_default();
476            let cluster_size = memory_ids.len();
477
478            // Store cluster for tracking (marked as triggered since we're returning it)
479            self.semantic_clusters.push(SemanticCluster {
480                memory_ids: memory_ids.clone(),
481                centroid_memory_id: centroid_id.clone(),
482                avg_similarity,
483                formed_at: Utc::now(),
484                triggered: true, // Already consumed - trigger returned immediately
485            });
486
487            triggers.push(ReplayTrigger::SemanticCluster {
488                memory_ids,
489                centroid_id,
490                avg_similarity,
491                cluster_size,
492            });
493        }
494
495        triggers
496    }
497
498    /// Check for behavioral pattern change
499    ///
500    /// Call when user behavior signals are detected (from feedback system).
501    pub fn check_behavioral_change(
502        &mut self,
503        new_topic: Option<&str>,
504        new_project: Option<&str>,
505        correction_detected: bool,
506        correction_keywords: &[String],
507    ) -> Option<ReplayTrigger> {
508        let now = Utc::now();
509
510        // Check for topic switch
511        if let Some(topic) = new_topic {
512            let old_topic = self.behavioral_context.current_topic.clone();
513            if let Some(ref old) = old_topic {
514                if old != topic {
515                    let affected = self.find_memories_by_topic(old);
516                    self.behavioral_context.current_topic = Some(topic.to_string());
517
518                    if !affected.is_empty() {
519                        return Some(ReplayTrigger::BehavioralPatternChange {
520                            change_type: BehaviorChangeType::TopicSwitch {
521                                from: old.clone(),
522                                to: topic.to_string(),
523                            },
524                            affected_memory_ids: affected,
525                            context: format!("Switched from {} to {}", old, topic),
526                        });
527                    }
528                }
529            } else {
530                self.behavioral_context.current_topic = Some(topic.to_string());
531            }
532        }
533
534        // Check for project switch
535        if let Some(project) = new_project {
536            if self.behavioral_context.current_project.as_deref() != Some(project) {
537                let old_project = self.behavioral_context.current_project.clone();
538                self.behavioral_context.current_project = Some(project.to_string());
539
540                if old_project.is_some() {
541                    let affected = self.find_memories_by_project(old_project.as_deref());
542                    if !affected.is_empty() {
543                        return Some(ReplayTrigger::BehavioralPatternChange {
544                            change_type: BehaviorChangeType::ProjectSwitch {
545                                from: old_project,
546                                to: Some(project.to_string()),
547                            },
548                            affected_memory_ids: affected,
549                            context: format!("Switched to project: {}", project),
550                        });
551                    }
552                }
553            }
554        }
555
556        // Check for user correction
557        if correction_detected && !correction_keywords.is_empty() {
558            // Only trigger if not too recent
559            let should_trigger = self
560                .behavioral_context
561                .last_correction
562                .map(|t| (now - t).num_hours() >= BEHAVIORAL_PATTERN_WINDOW_HOURS)
563                .unwrap_or(true);
564
565            if should_trigger {
566                self.behavioral_context.last_correction = Some(now);
567                let affected = self.find_memories_by_keywords(correction_keywords);
568
569                if !affected.is_empty() {
570                    return Some(ReplayTrigger::BehavioralPatternChange {
571                        change_type: BehaviorChangeType::UserCorrection {
572                            correction_keywords: correction_keywords.to_vec(),
573                        },
574                        affected_memory_ids: affected,
575                        context: format!("User correction: {}", correction_keywords.join(", ")),
576                    });
577                }
578            }
579        }
580
581        None
582    }
583
584    /// Get recent pattern detection statistics
585    pub fn stats(&self) -> PatternDetectorStats {
586        PatternDetectorStats {
587            recent_memories_tracked: self.recent_memories.len(),
588            entity_patterns_tracked: self.entity_patterns.len(),
589            semantic_clusters_tracked: self.semantic_clusters.len(),
590            salience_spikes_tracked: self.salience_spikes.len(),
591            importance_moving_avg: self.importance_moving_avg,
592            last_detection: self.last_detection,
593        }
594    }
595
596    /// Clear processed patterns to prevent memory growth
597    ///
598    /// Removes patterns that have triggered replay (consumed).
599    /// This is event-based cleanup, not time-based, aligning with the
600    /// neuroscience-inspired philosophy: patterns persist until processed.
601    pub fn cleanup(&mut self) {
602        // Remove entity patterns that have triggered
603        self.entity_patterns.retain(|_, stats| !stats.triggered);
604
605        // Remove semantic clusters that have triggered
606        self.semantic_clusters.retain(|c| !c.triggered);
607
608        // Remove salience spikes that have triggered
609        self.salience_spikes.retain(|s| !s.triggered);
610    }
611
612    // =========================================================================
613    // Private Helper Methods
614    // =========================================================================
615
616    /// Create a canonical key for an entity group
617    fn entity_group_key(&self, entities: &[String]) -> String {
618        let mut sorted: Vec<_> = entities.iter().map(|e| e.to_lowercase()).collect();
619        sorted.sort();
620        sorted.join("|")
621    }
622
623    /// Update temporal cluster tracking
624    fn update_temporal_cluster(&mut self, memory: &PatternMemory) {
625        let now = memory.created_at;
626        let window = Duration::seconds(TEMPORAL_CLUSTER_WINDOW_SECS);
627
628        match &mut self.current_temporal_cluster {
629            Some(cluster) => {
630                // Check if memory falls within current cluster window
631                if now - cluster.session_end <= window {
632                    cluster.memory_ids.push(memory.id.clone());
633                    cluster.session_end = now;
634                    if memory.session_id.is_some() && cluster.session_id.is_none() {
635                        cluster.session_id = memory.session_id.clone();
636                    }
637                } else {
638                    // Start new cluster
639                    self.current_temporal_cluster = Some(TemporalCluster {
640                        memory_ids: vec![memory.id.clone()],
641                        session_start: now,
642                        session_end: now,
643                        session_id: memory.session_id.clone(),
644                    });
645                }
646            }
647            None => {
648                self.current_temporal_cluster = Some(TemporalCluster {
649                    memory_ids: vec![memory.id.clone()],
650                    session_start: now,
651                    session_end: now,
652                    session_id: memory.session_id.clone(),
653                });
654            }
655        }
656    }
657
658    /// Detect entity co-occurrence patterns
659    fn detect_entity_patterns(&mut self) -> Vec<ReplayTrigger> {
660        let mut triggers = Vec::new();
661        let mut triggered_keys = Vec::new();
662
663        for (key, stats) in &self.entity_patterns {
664            if stats.memory_ids.len() >= MIN_MEMORIES_PER_PATTERN && !stats.triggered {
665                // Calculate confidence based on recency and frequency
666                let recency_factor = stats
667                    .last_seen
668                    .map(|t| {
669                        let age_hours = (Utc::now() - t).num_hours() as f32;
670                        (24.0 - age_hours.min(24.0)) / 24.0
671                    })
672                    .unwrap_or(0.5);
673
674                let frequency_factor =
675                    (stats.total_occurrences as f32 / stats.memory_ids.len() as f32).min(1.0);
676
677                let confidence = recency_factor * 0.6 + frequency_factor * 0.4;
678
679                if confidence >= ENTITY_PATTERN_CONFIDENCE {
680                    let entities: Vec<String> = key.split('|').map(String::from).collect();
681
682                    triggers.push(ReplayTrigger::EntityCoOccurrence {
683                        entities,
684                        memory_ids: stats.memory_ids.clone(),
685                        overlap_score: ENTITY_COOCCURRENCE_THRESHOLD,
686                        confidence,
687                    });
688
689                    triggered_keys.push(key.clone());
690                }
691            }
692        }
693
694        // Mark patterns as triggered (consumed)
695        for key in triggered_keys {
696            if let Some(stats) = self.entity_patterns.get_mut(&key) {
697                stats.triggered = true;
698            }
699        }
700
701        triggers
702    }
703
704    /// Detect temporal cluster trigger
705    fn detect_temporal_cluster(&self) -> Option<ReplayTrigger> {
706        self.current_temporal_cluster.as_ref().and_then(|cluster| {
707            if cluster.memory_ids.len() >= MIN_MEMORIES_PER_SESSION {
708                Some(ReplayTrigger::TemporalCluster {
709                    memory_ids: cluster.memory_ids.clone(),
710                    window_secs: TEMPORAL_CLUSTER_WINDOW_SECS,
711                    session_id: cluster.session_id.clone(),
712                })
713            } else {
714                None
715            }
716        })
717    }
718
719    /// Process pending salience spikes into triggers
720    fn process_salience_spikes(&mut self) -> Vec<ReplayTrigger> {
721        let mut triggers = Vec::new();
722        let mut triggered_indices = Vec::new();
723
724        for (idx, spike) in self.salience_spikes.iter().enumerate() {
725            // Only process spikes not yet triggered and above threshold
726            if !spike.triggered && spike.surprise_factor > SURPRISE_THRESHOLD {
727                triggers.push(ReplayTrigger::SalienceSpike {
728                    memory_id: spike.memory_id.clone(),
729                    content_preview: String::new(), // Would need to look up
730                    importance: spike.importance,
731                    arousal: spike.arousal,
732                    surprise_factor: spike.surprise_factor,
733                });
734                triggered_indices.push(idx);
735            }
736        }
737
738        // Mark spikes as triggered (consumed)
739        for idx in triggered_indices {
740            if let Some(spike) = self.salience_spikes.get_mut(idx) {
741                spike.triggered = true;
742            }
743        }
744
745        triggers
746    }
747
748    /// Find memories related to a topic (simple text match)
749    fn find_memories_by_topic(&self, topic: &str) -> Vec<String> {
750        let topic_lower = topic.to_lowercase();
751        self.recent_memories
752            .iter()
753            .filter(|m| {
754                m.content_preview.to_lowercase().contains(&topic_lower)
755                    || m.entities
756                        .iter()
757                        .any(|e| e.to_lowercase().contains(&topic_lower))
758            })
759            .map(|m| m.id.clone())
760            .collect()
761    }
762
763    /// Find memories by project
764    fn find_memories_by_project(&self, project: Option<&str>) -> Vec<String> {
765        match project {
766            Some(proj) => self
767                .recent_memories
768                .iter()
769                .filter(|m| m.session_id.as_deref() == Some(proj))
770                .map(|m| m.id.clone())
771                .collect(),
772            None => Vec::new(),
773        }
774    }
775
776    /// Find memories by keywords
777    fn find_memories_by_keywords(&self, keywords: &[String]) -> Vec<String> {
778        let keywords_lower: Vec<_> = keywords.iter().map(|k| k.to_lowercase()).collect();
779        self.recent_memories
780            .iter()
781            .filter(|m| {
782                let content_lower = m.content_preview.to_lowercase();
783                keywords_lower.iter().any(|kw| content_lower.contains(kw))
784            })
785            .map(|m| m.id.clone())
786            .collect()
787    }
788}
789
790/// Statistics about pattern detection
791#[derive(Debug, Clone)]
792pub struct PatternDetectorStats {
793    pub recent_memories_tracked: usize,
794    pub entity_patterns_tracked: usize,
795    pub semantic_clusters_tracked: usize,
796    pub salience_spikes_tracked: usize,
797    pub importance_moving_avg: f32,
798    pub last_detection: DateTime<Utc>,
799}
800
801#[cfg(test)]
802mod tests {
803    use super::*;
804
805    fn make_test_memory(id: &str, entities: Vec<&str>, importance: f32) -> PatternMemory {
806        PatternMemory {
807            id: id.to_string(),
808            content_preview: format!("Test memory {}", id),
809            entities: entities.into_iter().map(String::from).collect(),
810            importance,
811            arousal: 0.5,
812            created_at: Utc::now(),
813            embedding_hash: None,
814            session_id: None,
815            memory_type: "Observation".to_string(),
816        }
817    }
818
819    #[test]
820    fn test_entity_pattern_detection() {
821        let mut detector = PatternDetector::new();
822
823        // Register memories with shared entities
824        detector.register_memory(make_test_memory("m1", vec!["Rust", "HNSW"], 0.7));
825        detector.register_memory(make_test_memory("m2", vec!["Rust", "HNSW"], 0.6));
826        detector.register_memory(make_test_memory("m3", vec!["Rust", "HNSW"], 0.8));
827
828        let result = detector.detect_patterns();
829
830        // Should detect entity co-occurrence pattern
831        assert!(
832            result.entity_patterns_found > 0,
833            "Should detect entity pattern"
834        );
835    }
836
837    #[test]
838    fn test_salience_spike_detection() {
839        let mut detector = PatternDetector::new();
840
841        // Register some baseline memories
842        for i in 0..10 {
843            detector.register_memory(make_test_memory(&format!("m{}", i), vec![], 0.5));
844        }
845
846        // Register a high-importance memory
847        let spike_memory = PatternMemory {
848            id: "spike".to_string(),
849            content_preview: "Critical error detected".to_string(),
850            entities: vec![],
851            importance: 0.95,
852            arousal: 0.9,
853            created_at: Utc::now(),
854            embedding_hash: None,
855            session_id: None,
856            memory_type: "Error".to_string(),
857        };
858
859        let trigger = detector.check_salience_spike(&spike_memory);
860        assert!(trigger.is_some(), "Should detect salience spike");
861    }
862
863    #[test]
864    fn test_temporal_cluster() {
865        let mut detector = PatternDetector::new();
866
867        // Register memories in quick succession
868        for i in 0..5 {
869            detector.register_memory(make_test_memory(&format!("m{}", i), vec![], 0.5));
870        }
871
872        let result = detector.detect_patterns();
873
874        // Should detect temporal cluster
875        assert!(
876            result
877                .triggers
878                .iter()
879                .any(|t| matches!(t, ReplayTrigger::TemporalCluster { .. })),
880            "Should detect temporal cluster"
881        );
882    }
883
884    #[test]
885    fn test_semantic_cluster_detection() {
886        let mut detector = PatternDetector::new();
887
888        // Provide similarity data forming a cluster
889        let similarities = vec![
890            ("m1".to_string(), "m2".to_string(), 0.85),
891            ("m2".to_string(), "m3".to_string(), 0.82),
892            ("m1".to_string(), "m3".to_string(), 0.80),
893        ];
894
895        let triggers = detector.detect_semantic_clusters(&similarities);
896
897        // Should detect semantic cluster
898        assert!(!triggers.is_empty(), "Should detect semantic cluster");
899    }
900}