codex_memory/memory/
reflection_engine.rs

1//! Reflection & Insight Generation Engine
2//!
3//! This module implements a cognitive architecture for generating higher-level insights
4//! from accumulated memories through reflection processes. The design is based on:
5//!
6//! ## Cognitive Science Foundation
7//!
8//! ### Core Research
9//! 1. **Metacognition Theory (Flavell, 1979)**: Thinking about thinking processes
10//! 2. **Elaborative Processing (Craik & Lockhart, 1972)**: Deeper processing creates stronger memories
11//! 3. **Schema Theory (Bartlett, 1932)**: Knowledge organized in interconnected structures
12//! 4. **Constructive Memory (Schacter, 1999)**: Memory reconstruction creates new insights
13//! 5. **Knowledge Graph Theory (Semantic Networks)**: Conceptual relationships enable inference
14//!
15//! ### Reflection Triggers
16//! - **Importance Accumulation**: Sum > 150 points triggers reflection
17//! - **Semantic Clustering**: Dense clusters of related memories
18//! - **Temporal Patterns**: Recurring themes over time
19//! - **Contradiction Detection**: Conflicting information requiring resolution
20//! - **Gap Identification**: Missing knowledge in established schemas
21//!
22//! ## Architecture Components
23//!
24//! ### 1. Reflection Trigger System
25//! Monitors memory accumulation and identifies when reflection should occur
26//!
27//! ### 2. Memory Clustering Engine
28//! Groups semantically related memories for insight generation
29//!
30//! ### 3. Insight Generation Pipeline
31//! - Pattern Detection: Identifies recurring themes and relationships
32//! - Gap Analysis: Finds missing connections in knowledge structures
33//! - Synthesis: Combines related concepts into higher-level insights
34//! - Validation: Ensures insights are novel and meaningful
35//!
36//! ### 4. Knowledge Graph Builder
37//! Creates and maintains bidirectional relationships between memories and insights
38//!
39//! ### 5. Meta-Memory Manager
40//! Handles insight storage, retrieval, and relationship tracking
41
42use super::error::{MemoryError, Result};
43use super::models::*;
44use super::repository::MemoryRepository;
45use chrono::{DateTime, Duration, Utc};
46use pgvector::Vector;
47use serde::{Deserialize, Serialize};
48use std::collections::{HashMap, HashSet, VecDeque};
49use std::sync::Arc;
50use tracing::{debug, info, warn};
51use uuid::Uuid;
52
53/// Configuration for reflection and insight generation
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct ReflectionConfig {
56    /// Importance threshold that triggers reflection
57    pub importance_trigger_threshold: f64,
58
59    /// Maximum memories to analyze in one reflection session
60    pub max_memories_per_reflection: usize,
61
62    /// Target number of insights per reflection
63    pub target_insights_per_reflection: usize,
64
65    /// Minimum similarity for memory clustering
66    pub clustering_similarity_threshold: f64,
67
68    /// Importance multiplier for generated insights
69    pub insight_importance_multiplier: f64,
70
71    /// Maximum depth for knowledge graph traversal
72    pub max_graph_depth: usize,
73
74    /// Minimum cluster size for insight generation
75    pub min_cluster_size: usize,
76
77    /// Time window for temporal pattern analysis (days)
78    pub temporal_analysis_window_days: i64,
79
80    /// Cooldown period between reflections (hours)
81    pub reflection_cooldown_hours: i64,
82}
83
84impl Default for ReflectionConfig {
85    fn default() -> Self {
86        Self {
87            importance_trigger_threshold: 150.0,
88            max_memories_per_reflection: 100,
89            target_insights_per_reflection: 3,
90            clustering_similarity_threshold: 0.75,
91            insight_importance_multiplier: 1.5,
92            max_graph_depth: 3,
93            min_cluster_size: 3,
94            temporal_analysis_window_days: 30,
95            reflection_cooldown_hours: 6,
96        }
97    }
98}
99
100/// Types of insights that can be generated
101#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
102pub enum InsightType {
103    /// Pattern detected across multiple memories
104    Pattern,
105    /// Synthesis of related concepts
106    Synthesis,
107    /// Gap identified in knowledge structure
108    Gap,
109    /// Contradiction requiring resolution
110    Contradiction,
111    /// Temporal trend or evolution
112    Trend,
113    /// Causal relationship discovered
114    Causality,
115    /// Analogy between disparate concepts
116    Analogy,
117}
118
119/// Insight generation result
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct Insight {
122    pub id: Uuid,
123    pub insight_type: InsightType,
124    pub content: String,
125    pub confidence_score: f64,
126    pub source_memory_ids: Vec<Uuid>,
127    pub related_concepts: Vec<String>,
128    pub knowledge_graph_nodes: Vec<KnowledgeNode>,
129    pub importance_score: f64,
130    pub generated_at: DateTime<Utc>,
131    pub validation_metrics: ValidationMetrics,
132}
133
134/// Metrics for validating insight quality
135#[derive(Debug, Clone, Serialize, Deserialize)]
136pub struct ValidationMetrics {
137    pub novelty_score: f64,
138    pub coherence_score: f64,
139    pub evidence_strength: f64,
140    pub semantic_richness: f64,
141    pub predictive_power: f64,
142}
143
144/// Knowledge graph node representing concepts and relationships
145#[derive(Debug, Clone, Serialize, Deserialize)]
146pub struct KnowledgeNode {
147    pub id: Uuid,
148    pub concept: String,
149    pub node_type: NodeType,
150    #[serde(skip)]
151    pub embedding: Option<Vector>,
152    pub confidence: f64,
153    pub connections: Vec<KnowledgeEdge>,
154    pub created_at: DateTime<Utc>,
155}
156
157/// Types of knowledge nodes
158#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
159pub enum NodeType {
160    Concept,
161    Entity,
162    Relationship,
163    Insight,
164    Memory,
165}
166
167/// Edges in the knowledge graph
168#[derive(Debug, Clone, Serialize, Deserialize)]
169pub struct KnowledgeEdge {
170    pub target_node_id: Uuid,
171    pub relationship_type: RelationshipType,
172    pub strength: f64,
173    pub evidence_memories: Vec<Uuid>,
174}
175
176/// Types of relationships in the knowledge graph
177#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
178pub enum RelationshipType {
179    IsA,
180    PartOf,
181    CausedBy,
182    SimilarTo,
183    ConflictsWith,
184    Enables,
185    Requires,
186    Exemplifies,
187    GeneralizedBy,
188    TemporallyPrecedes,
189}
190
191/// Memory cluster for insight generation
192#[derive(Debug, Clone)]
193pub struct MemoryCluster {
194    pub id: Uuid,
195    pub memories: Vec<Memory>,
196    pub centroid_embedding: Option<Vector>,
197    pub coherence_score: f64,
198    pub dominant_concepts: Vec<String>,
199    pub temporal_span: Option<(DateTime<Utc>, DateTime<Utc>)>,
200}
201
202/// Reflection session state
203#[derive(Debug, Clone)]
204pub struct ReflectionSession {
205    pub id: Uuid,
206    pub started_at: DateTime<Utc>,
207    pub trigger_reason: String,
208    pub analyzed_memories: Vec<Memory>,
209    pub generated_clusters: Vec<MemoryCluster>,
210    pub generated_insights: Vec<Insight>,
211    pub knowledge_graph_updates: Vec<KnowledgeNode>,
212    pub completion_status: ReflectionStatus,
213}
214
215/// Status of reflection session
216#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
217pub enum ReflectionStatus {
218    InProgress,
219    Completed,
220    Failed,
221    Cancelled,
222}
223
224/// Main reflection and insight generation engine
225pub struct ReflectionEngine {
226    config: ReflectionConfig,
227    repository: Arc<MemoryRepository>,
228    #[allow(dead_code)]
229    knowledge_graph: KnowledgeGraph,
230    last_reflection_time: Option<DateTime<Utc>>,
231}
232
233impl ReflectionEngine {
234    pub fn new(config: ReflectionConfig, repository: Arc<MemoryRepository>) -> Self {
235        Self {
236            config,
237            repository,
238            knowledge_graph: KnowledgeGraph::new(),
239            last_reflection_time: None,
240        }
241    }
242
243    /// Check if reflection should be triggered based on accumulated importance
244    pub async fn should_trigger_reflection(&self) -> Result<Option<String>> {
245        // Check cooldown period
246        if let Some(last_time) = self.last_reflection_time {
247            let hours_since_last = Utc::now().signed_duration_since(last_time).num_hours();
248            if hours_since_last < self.config.reflection_cooldown_hours {
249                return Ok(None);
250            }
251        }
252
253        // Calculate total importance since last reflection
254        let cutoff_time = self
255            .last_reflection_time
256            .unwrap_or(Utc::now() - Duration::days(1));
257
258        let recent_memories = self.get_recent_memories_since(cutoff_time).await?;
259        let total_importance: f64 = recent_memories.iter().map(|m| m.importance_score).sum();
260
261        if total_importance >= self.config.importance_trigger_threshold {
262            return Ok(Some(format!(
263                "Importance threshold reached: {:.1} >= {:.1}",
264                total_importance, self.config.importance_trigger_threshold
265            )));
266        }
267
268        // Check for dense semantic clusters
269        if let Some(cluster_reason) = self.check_cluster_density(&recent_memories).await? {
270            return Ok(Some(cluster_reason));
271        }
272
273        // Check for temporal patterns
274        if let Some(pattern_reason) = self.check_temporal_patterns(&recent_memories).await? {
275            return Ok(Some(pattern_reason));
276        }
277
278        Ok(None)
279    }
280
281    /// Execute a complete reflection session
282    pub async fn execute_reflection(
283        &mut self,
284        trigger_reason: String,
285    ) -> Result<ReflectionSession> {
286        let session_id = Uuid::new_v4();
287        let start_time = Utc::now();
288
289        info!(
290            "Starting reflection session {}: {}",
291            session_id, trigger_reason
292        );
293
294        let mut session = ReflectionSession {
295            id: session_id,
296            started_at: start_time,
297            trigger_reason: trigger_reason.clone(),
298            analyzed_memories: Vec::new(),
299            generated_clusters: Vec::new(),
300            generated_insights: Vec::new(),
301            knowledge_graph_updates: Vec::new(),
302            completion_status: ReflectionStatus::InProgress,
303        };
304
305        match self.execute_reflection_pipeline(&mut session).await {
306            Ok(_) => {
307                session.completion_status = ReflectionStatus::Completed;
308                self.last_reflection_time = Some(Utc::now());
309
310                info!(
311                    "Reflection session {} completed: {} insights generated from {} memories",
312                    session_id,
313                    session.generated_insights.len(),
314                    session.analyzed_memories.len()
315                );
316            }
317            Err(e) => {
318                session.completion_status = ReflectionStatus::Failed;
319                warn!("Reflection session {} failed: {}", session_id, e);
320                return Err(e);
321            }
322        }
323
324        Ok(session)
325    }
326
327    /// Main reflection pipeline execution
328    async fn execute_reflection_pipeline(&self, session: &mut ReflectionSession) -> Result<()> {
329        // Step 1: Gather memories for analysis
330        session.analyzed_memories = self.gather_reflection_memories().await?;
331
332        if session.analyzed_memories.is_empty() {
333            return Err(MemoryError::InvalidRequest {
334                message: "No memories available for reflection".to_string(),
335            });
336        }
337
338        // Step 2: Cluster memories by semantic similarity
339        session.generated_clusters = self.cluster_memories(&session.analyzed_memories).await?;
340
341        // Step 3: Generate insights from clusters
342        for cluster in &session.generated_clusters {
343            let cluster_insights = self.generate_cluster_insights(cluster).await?;
344            session.generated_insights.extend(cluster_insights);
345        }
346
347        // Step 4: Cross-cluster insight generation
348        let cross_cluster_insights = self
349            .generate_cross_cluster_insights(&session.generated_clusters)
350            .await?;
351        session.generated_insights.extend(cross_cluster_insights);
352
353        // Step 5: Update knowledge graph
354        session.knowledge_graph_updates = self
355            .update_knowledge_graph(&session.generated_insights)
356            .await?;
357
358        // Step 6: Store insights as meta-memories
359        self.store_insights_as_memories(&session.generated_insights)
360            .await?;
361
362        // Step 7: Validate and prune insights to prevent loops
363        self.validate_and_prune_insights(&mut session.generated_insights)
364            .await?;
365
366        Ok(())
367    }
368
369    /// Gather memories for reflection analysis
370    async fn gather_reflection_memories(&self) -> Result<Vec<Memory>> {
371        let cutoff_time = self
372            .last_reflection_time
373            .unwrap_or(Utc::now() - Duration::days(self.config.temporal_analysis_window_days));
374
375        // Get recent high-importance memories
376        let search_request = SearchRequest {
377            date_range: Some(DateRange {
378                start: Some(cutoff_time),
379                end: Some(Utc::now()),
380            }),
381            importance_range: Some(RangeFilter {
382                min: Some(0.3),
383                max: None,
384            }),
385            limit: Some(self.config.max_memories_per_reflection as i32),
386            search_type: Some(SearchType::Temporal),
387            ..Default::default()
388        };
389
390        let search_response = self.repository.search_memories(search_request).await?;
391
392        Ok(search_response
393            .results
394            .into_iter()
395            .map(|result| result.memory)
396            .collect())
397    }
398
399    /// Cluster memories by semantic similarity using hierarchical clustering
400    async fn cluster_memories(&self, memories: &[Memory]) -> Result<Vec<MemoryCluster>> {
401        let mut clusters = Vec::new();
402        let mut unassigned_memories: Vec<_> = memories.iter().collect();
403
404        while !unassigned_memories.is_empty() {
405            let seed_memory = unassigned_memories.remove(0);
406            let mut cluster_memories = vec![seed_memory.clone()];
407
408            // Find similar memories for this cluster
409            let mut i = 0;
410            while i < unassigned_memories.len() {
411                let memory = unassigned_memories[i];
412
413                if let (Some(seed_embedding), Some(memory_embedding)) =
414                    (&seed_memory.embedding, &memory.embedding)
415                {
416                    let similarity =
417                        self.calculate_cosine_similarity(seed_embedding, memory_embedding)?;
418
419                    if similarity >= self.config.clustering_similarity_threshold {
420                        cluster_memories.push(memory.clone());
421                        unassigned_memories.remove(i);
422                    } else {
423                        i += 1;
424                    }
425                } else {
426                    i += 1;
427                }
428            }
429
430            // Only create cluster if it meets minimum size requirement
431            if cluster_memories.len() >= self.config.min_cluster_size {
432                let cluster = self.create_memory_cluster(cluster_memories).await?;
433                clusters.push(cluster);
434            }
435        }
436
437        Ok(clusters)
438    }
439
440    /// Create a memory cluster with computed properties
441    async fn create_memory_cluster(&self, memories: Vec<Memory>) -> Result<MemoryCluster> {
442        let cluster_id = Uuid::new_v4();
443
444        // Calculate centroid embedding if available
445        let centroid_embedding = self.calculate_centroid_embedding(&memories)?;
446
447        // Calculate coherence score (average pairwise similarity)
448        let coherence_score = self.calculate_cluster_coherence(&memories)?;
449
450        // Extract dominant concepts (simplified - would use NER/topic modeling in production)
451        let dominant_concepts = self.extract_dominant_concepts(&memories).await?;
452
453        // Calculate temporal span
454        let temporal_span = self.calculate_temporal_span(&memories);
455
456        Ok(MemoryCluster {
457            id: cluster_id,
458            memories,
459            centroid_embedding,
460            coherence_score,
461            dominant_concepts,
462            temporal_span,
463        })
464    }
465
466    /// Generate insights from a single memory cluster
467    async fn generate_cluster_insights(&self, cluster: &MemoryCluster) -> Result<Vec<Insight>> {
468        let mut insights = Vec::new();
469
470        // Pattern detection insight
471        if let Some(pattern_insight) = self.detect_cluster_patterns(cluster).await? {
472            insights.push(pattern_insight);
473        }
474
475        // Synthesis insight
476        if let Some(synthesis_insight) = self.generate_synthesis_insight(cluster).await? {
477            insights.push(synthesis_insight);
478        }
479
480        // Temporal trend insight
481        if let Some(trend_insight) = self.detect_temporal_trends(cluster).await? {
482            insights.push(trend_insight);
483        }
484
485        // Gap analysis insight
486        if let Some(gap_insight) = self.identify_knowledge_gaps(cluster).await? {
487            insights.push(gap_insight);
488        }
489
490        Ok(insights)
491    }
492
493    /// Generate insights across multiple clusters
494    async fn generate_cross_cluster_insights(
495        &self,
496        clusters: &[MemoryCluster],
497    ) -> Result<Vec<Insight>> {
498        let mut insights = Vec::new();
499
500        // Analogy detection between clusters
501        for i in 0..clusters.len() {
502            for j in i + 1..clusters.len() {
503                if let Some(analogy_insight) = self
504                    .detect_cross_cluster_analogies(&clusters[i], &clusters[j])
505                    .await?
506                {
507                    insights.push(analogy_insight);
508                }
509            }
510        }
511
512        // Causal relationship detection
513        let causal_insights = self.detect_causal_relationships(clusters).await?;
514        insights.extend(causal_insights);
515
516        Ok(insights)
517    }
518
519    /// Update the knowledge graph with new insights
520    async fn update_knowledge_graph(&self, insights: &[Insight]) -> Result<Vec<KnowledgeNode>> {
521        let mut new_nodes = Vec::new();
522
523        for insight in insights {
524            // Create node for the insight itself
525            let insight_node = KnowledgeNode {
526                id: insight.id,
527                concept: insight.content.clone(),
528                node_type: NodeType::Insight,
529                embedding: None, // Would generate embedding in production
530                confidence: insight.confidence_score,
531                connections: Vec::new(),
532                created_at: insight.generated_at,
533            };
534
535            new_nodes.push(insight_node);
536
537            // Create nodes for related concepts
538            for concept in &insight.related_concepts {
539                let concept_node = KnowledgeNode {
540                    id: Uuid::new_v4(),
541                    concept: concept.clone(),
542                    node_type: NodeType::Concept,
543                    embedding: None,
544                    confidence: 0.8,
545                    connections: vec![KnowledgeEdge {
546                        target_node_id: insight.id,
547                        relationship_type: RelationshipType::Exemplifies,
548                        strength: 0.9,
549                        evidence_memories: insight.source_memory_ids.clone(),
550                    }],
551                    created_at: Utc::now(),
552                };
553
554                new_nodes.push(concept_node);
555            }
556        }
557
558        Ok(new_nodes)
559    }
560
561    /// Store insights as high-importance meta-memories
562    async fn store_insights_as_memories(&self, insights: &[Insight]) -> Result<()> {
563        for insight in insights {
564            let importance_score =
565                insight.importance_score * self.config.insight_importance_multiplier;
566            let importance_score = importance_score.min(1.0); // Cap at 1.0
567
568            let metadata = serde_json::json!({
569                "insight_type": insight.insight_type,
570                "confidence_score": insight.confidence_score,
571                "source_memory_ids": insight.source_memory_ids,
572                "related_concepts": insight.related_concepts,
573                "validation_metrics": insight.validation_metrics,
574                "is_meta_memory": true,
575                "generated_by": "reflection_engine"
576            });
577
578            let create_request = CreateMemoryRequest {
579                content: insight.content.clone(),
580                embedding: None,                 // Would generate in production
581                tier: Some(MemoryTier::Working), // Start insights in working tier
582                importance_score: Some(importance_score),
583                metadata: Some(metadata),
584                parent_id: None,
585                expires_at: None,
586            };
587
588            match self.repository.create_memory(create_request).await {
589                Ok(memory) => {
590                    debug!("Stored insight {} as memory {}", insight.id, memory.id);
591                }
592                Err(e) => {
593                    warn!("Failed to store insight {} as memory: {}", insight.id, e);
594                }
595            }
596        }
597
598        Ok(())
599    }
600
601    /// Validate insights and prune duplicates/loops to prevent insight inflation
602    async fn validate_and_prune_insights(&self, insights: &mut Vec<Insight>) -> Result<()> {
603        let mut validated_insights = Vec::new();
604        let mut seen_concepts = HashSet::new();
605
606        for insight in insights.iter() {
607            // Check for conceptual novelty
608            let concept_hash = self.hash_insight_concepts(insight);
609            if seen_concepts.contains(&concept_hash) {
610                debug!("Pruning duplicate insight: {}", insight.content);
611                continue;
612            }
613
614            // Validate insight quality
615            if self.validate_insight_quality(insight).await? {
616                seen_concepts.insert(concept_hash);
617                validated_insights.push(insight.clone());
618            } else {
619                debug!("Pruning low-quality insight: {}", insight.content);
620            }
621        }
622
623        *insights = validated_insights;
624        Ok(())
625    }
626
627    // Helper methods for insight generation
628    async fn detect_cluster_patterns(&self, cluster: &MemoryCluster) -> Result<Option<Insight>> {
629        if cluster.memories.len() < self.config.min_cluster_size {
630            return Ok(None);
631        }
632
633        // Analyze patterns in memory content
634        let pattern_frequency = self.analyze_content_patterns(&cluster.memories).await?;
635
636        if let Some((dominant_pattern, frequency)) =
637            pattern_frequency.iter().max_by_key(|(_, freq)| *freq)
638        {
639            if *frequency >= 3 {
640                let confidence_score =
641                    ((*frequency as f64) / cluster.memories.len() as f64).min(1.0);
642
643                if confidence_score >= 0.6 {
644                    let insight = Insight {
645                        id: Uuid::new_v4(),
646                        insight_type: InsightType::Pattern,
647                        content: format!(
648                            "Detected recurring pattern '{}' across {} memories in cluster with {:.1}% frequency",
649                            dominant_pattern,
650                            cluster.memories.len(),
651                            confidence_score * 100.0
652                        ),
653                        confidence_score,
654                        source_memory_ids: cluster.memories.iter().map(|m| m.id).collect(),
655                        related_concepts: vec![dominant_pattern.clone()],
656                        knowledge_graph_nodes: Vec::new(),
657                        importance_score: confidence_score * 0.8,
658                        generated_at: Utc::now(),
659                        validation_metrics: ValidationMetrics {
660                            novelty_score: 0.7,
661                            coherence_score: confidence_score,
662                            evidence_strength: confidence_score,
663                            semantic_richness: 0.6,
664                            predictive_power: 0.5,
665                        },
666                    };
667
668                    return Ok(Some(insight));
669                }
670            }
671        }
672
673        Ok(None)
674    }
675
676    async fn generate_synthesis_insight(&self, cluster: &MemoryCluster) -> Result<Option<Insight>> {
677        if cluster.dominant_concepts.len() < 2 {
678            return Ok(None);
679        }
680
681        // Synthesize concepts from the cluster
682        let synthesis_content = format!(
683            "Synthesis of {} related memories reveals connections between concepts: {}. This cluster shows coherence of {:.2} and spans memories from {} sources.",
684            cluster.memories.len(),
685            cluster.dominant_concepts.join(", "),
686            cluster.coherence_score,
687            cluster.memories.len()
688        );
689
690        let importance_score = cluster.coherence_score * 0.9;
691        let confidence_score = cluster.coherence_score;
692
693        let insight = Insight {
694            id: Uuid::new_v4(),
695            insight_type: InsightType::Synthesis,
696            content: synthesis_content,
697            confidence_score,
698            source_memory_ids: cluster.memories.iter().map(|m| m.id).collect(),
699            related_concepts: cluster.dominant_concepts.clone(),
700            knowledge_graph_nodes: Vec::new(),
701            importance_score,
702            generated_at: Utc::now(),
703            validation_metrics: ValidationMetrics {
704                novelty_score: 0.6,
705                coherence_score: cluster.coherence_score,
706                evidence_strength: (cluster.memories.len() as f64 / 10.0).min(1.0),
707                semantic_richness: (cluster.dominant_concepts.len() as f64 / 5.0).min(1.0),
708                predictive_power: 0.6,
709            },
710        };
711
712        Ok(Some(insight))
713    }
714
715    async fn detect_temporal_trends(&self, cluster: &MemoryCluster) -> Result<Option<Insight>> {
716        if let Some((start_time, end_time)) = cluster.temporal_span {
717            let duration = end_time.signed_duration_since(start_time);
718
719            if duration.num_hours() > 24 {
720                let trend_content = format!(
721                    "Temporal trend detected: {} related memories occurred over {} days, suggesting sustained engagement with topic involving: {}",
722                    cluster.memories.len(),
723                    duration.num_days(),
724                    cluster.dominant_concepts.join(", ")
725                );
726
727                let temporal_density =
728                    cluster.memories.len() as f64 / duration.num_days().max(1) as f64;
729                let confidence_score = (temporal_density / 5.0).min(1.0);
730
731                if confidence_score >= 0.3 {
732                    let insight = Insight {
733                        id: Uuid::new_v4(),
734                        insight_type: InsightType::Trend,
735                        content: trend_content,
736                        confidence_score,
737                        source_memory_ids: cluster.memories.iter().map(|m| m.id).collect(),
738                        related_concepts: cluster.dominant_concepts.clone(),
739                        knowledge_graph_nodes: Vec::new(),
740                        importance_score: confidence_score * 0.7,
741                        generated_at: Utc::now(),
742                        validation_metrics: ValidationMetrics {
743                            novelty_score: 0.5,
744                            coherence_score: cluster.coherence_score,
745                            evidence_strength: confidence_score,
746                            semantic_richness: 0.4,
747                            predictive_power: 0.8,
748                        },
749                    };
750
751                    return Ok(Some(insight));
752                }
753            }
754        }
755
756        Ok(None)
757    }
758
759    async fn identify_knowledge_gaps(&self, cluster: &MemoryCluster) -> Result<Option<Insight>> {
760        // Analyze cluster for potential knowledge gaps
761        // This is a simplified heuristic-based approach
762
763        if cluster.memories.len() >= 5 && cluster.coherence_score < 0.6 {
764            // Low coherence in a large cluster might indicate missing connections
765            let gap_content = format!(
766                "Potential knowledge gap identified: {} memories about {} show low coherence ({:.2}), suggesting missing connections or intermediate concepts",
767                cluster.memories.len(),
768                cluster.dominant_concepts.join(", "),
769                cluster.coherence_score
770            );
771
772            let confidence_score = 1.0 - cluster.coherence_score;
773
774            if confidence_score >= 0.4 {
775                let insight = Insight {
776                    id: Uuid::new_v4(),
777                    insight_type: InsightType::Gap,
778                    content: gap_content,
779                    confidence_score,
780                    source_memory_ids: cluster.memories.iter().map(|m| m.id).collect(),
781                    related_concepts: cluster.dominant_concepts.clone(),
782                    knowledge_graph_nodes: Vec::new(),
783                    importance_score: confidence_score * 0.6,
784                    generated_at: Utc::now(),
785                    validation_metrics: ValidationMetrics {
786                        novelty_score: 0.8,
787                        coherence_score: 0.5,
788                        evidence_strength: confidence_score,
789                        semantic_richness: 0.7,
790                        predictive_power: 0.9,
791                    },
792                };
793
794                return Ok(Some(insight));
795            }
796        }
797
798        Ok(None)
799    }
800
801    /// Analyze content patterns in a cluster of memories
802    async fn analyze_content_patterns(
803        &self,
804        memories: &[Memory],
805    ) -> Result<HashMap<String, usize>> {
806        let mut pattern_counts = HashMap::new();
807
808        for memory in memories {
809            // Simple pattern detection based on common words/phrases
810            // In production, this would use NLP techniques
811            let content_lower = memory.content.to_lowercase();
812            let words: Vec<&str> = content_lower
813                .split_whitespace()
814                .filter(|word| word.len() > 4) // Focus on meaningful words
815                .collect();
816
817            for word in words {
818                *pattern_counts.entry(word.to_string()).or_insert(0) += 1;
819            }
820        }
821
822        // Filter out patterns that appear in less than 2 memories
823        pattern_counts.retain(|_pattern, count| *count >= 2);
824
825        Ok(pattern_counts)
826    }
827
828    async fn detect_cross_cluster_analogies(
829        &self,
830        _cluster1: &MemoryCluster,
831        _cluster2: &MemoryCluster,
832    ) -> Result<Option<Insight>> {
833        // Implementation would find analogies between different concept clusters
834        Ok(None)
835    }
836
837    async fn detect_causal_relationships(
838        &self,
839        _clusters: &[MemoryCluster],
840    ) -> Result<Vec<Insight>> {
841        // Implementation would identify causal relationships across clusters
842        Ok(Vec::new())
843    }
844
845    // Utility methods
846    async fn get_recent_memories_since(&self, cutoff_time: DateTime<Utc>) -> Result<Vec<Memory>> {
847        let search_request = SearchRequest {
848            date_range: Some(DateRange {
849                start: Some(cutoff_time),
850                end: Some(Utc::now()),
851            }),
852            search_type: Some(SearchType::Temporal),
853            limit: Some(1000),
854            ..Default::default()
855        };
856
857        let response = self.repository.search_memories(search_request).await?;
858        Ok(response.results.into_iter().map(|r| r.memory).collect())
859    }
860
861    async fn check_cluster_density(&self, _memories: &[Memory]) -> Result<Option<String>> {
862        // Implementation would check for dense semantic clusters
863        Ok(None)
864    }
865
866    async fn check_temporal_patterns(&self, _memories: &[Memory]) -> Result<Option<String>> {
867        // Implementation would check for temporal patterns warranting reflection
868        Ok(None)
869    }
870
871    fn calculate_cosine_similarity(&self, vec1: &Vector, vec2: &Vector) -> Result<f64> {
872        let slice1 = vec1.as_slice();
873        let slice2 = vec2.as_slice();
874
875        if slice1.len() != slice2.len() {
876            return Ok(0.0);
877        }
878
879        let dot_product: f64 = slice1
880            .iter()
881            .zip(slice2.iter())
882            .map(|(a, b)| (*a as f64) * (*b as f64))
883            .sum();
884
885        let norm1: f64 = slice1
886            .iter()
887            .map(|x| (*x as f64).powi(2))
888            .sum::<f64>()
889            .sqrt();
890        let norm2: f64 = slice2
891            .iter()
892            .map(|x| (*x as f64).powi(2))
893            .sum::<f64>()
894            .sqrt();
895
896        if norm1 == 0.0 || norm2 == 0.0 {
897            return Ok(0.0);
898        }
899
900        Ok(dot_product / (norm1 * norm2))
901    }
902
903    fn calculate_centroid_embedding(&self, memories: &[Memory]) -> Result<Option<Vector>> {
904        let embeddings: Vec<_> = memories
905            .iter()
906            .filter_map(|m| m.embedding.as_ref())
907            .collect();
908
909        if embeddings.is_empty() {
910            return Ok(None);
911        }
912
913        let dim = embeddings[0].as_slice().len();
914        let mut centroid = vec![0.0f32; dim];
915
916        for embedding in &embeddings {
917            for (i, &val) in embedding.as_slice().iter().enumerate() {
918                centroid[i] += val;
919            }
920        }
921
922        for val in &mut centroid {
923            *val /= embeddings.len() as f32;
924        }
925
926        Ok(Some(Vector::from(centroid)))
927    }
928
929    fn calculate_cluster_coherence(&self, memories: &[Memory]) -> Result<f64> {
930        let embeddings: Vec<_> = memories
931            .iter()
932            .filter_map(|m| m.embedding.as_ref())
933            .collect();
934
935        if embeddings.len() < 2 {
936            return Ok(1.0);
937        }
938
939        let mut total_similarity = 0.0;
940        let mut pair_count = 0;
941
942        for i in 0..embeddings.len() {
943            for j in i + 1..embeddings.len() {
944                let similarity = self.calculate_cosine_similarity(embeddings[i], embeddings[j])?;
945                total_similarity += similarity;
946                pair_count += 1;
947            }
948        }
949
950        Ok(if pair_count > 0 {
951            total_similarity / pair_count as f64
952        } else {
953            0.0
954        })
955    }
956
957    async fn extract_dominant_concepts(&self, _memories: &[Memory]) -> Result<Vec<String>> {
958        // Simplified implementation - would use NLP/topic modeling in production
959        Ok(vec!["concept1".to_string(), "concept2".to_string()])
960    }
961
962    fn calculate_temporal_span(
963        &self,
964        memories: &[Memory],
965    ) -> Option<(DateTime<Utc>, DateTime<Utc>)> {
966        if memories.is_empty() {
967            return None;
968        }
969
970        let mut min_time = memories[0].created_at;
971        let mut max_time = memories[0].created_at;
972
973        for memory in memories {
974            if memory.created_at < min_time {
975                min_time = memory.created_at;
976            }
977            if memory.created_at > max_time {
978                max_time = memory.created_at;
979            }
980        }
981
982        Some((min_time, max_time))
983    }
984
985    fn hash_insight_concepts(&self, insight: &Insight) -> u64 {
986        use std::collections::hash_map::DefaultHasher;
987        use std::hash::{Hash, Hasher};
988
989        let mut hasher = DefaultHasher::new();
990        insight.related_concepts.hash(&mut hasher);
991        insight.insight_type.hash(&mut hasher);
992        hasher.finish()
993    }
994
995    async fn validate_insight_quality(&self, insight: &Insight) -> Result<bool> {
996        // Validate based on metrics
997        let metrics = &insight.validation_metrics;
998
999        // Minimum thresholds for quality
1000        Ok(metrics.novelty_score > 0.3
1001            && metrics.coherence_score > 0.5
1002            && metrics.evidence_strength > 0.4
1003            && insight.confidence_score > 0.6)
1004    }
1005}
1006
1007/// Knowledge graph management
1008pub struct KnowledgeGraph {
1009    nodes: HashMap<Uuid, KnowledgeNode>,
1010    concept_index: HashMap<String, Vec<Uuid>>,
1011}
1012
1013impl KnowledgeGraph {
1014    pub fn new() -> Self {
1015        Self {
1016            nodes: HashMap::new(),
1017            concept_index: HashMap::new(),
1018        }
1019    }
1020
1021    pub fn add_node(&mut self, node: KnowledgeNode) {
1022        // Index by concept for fast lookup
1023        self.concept_index
1024            .entry(node.concept.clone())
1025            .or_default()
1026            .push(node.id);
1027
1028        self.nodes.insert(node.id, node);
1029    }
1030
1031    pub fn find_related_concepts(&self, concept: &str, max_depth: usize) -> Vec<Uuid> {
1032        let mut related = Vec::new();
1033        let mut visited = HashSet::new();
1034        let mut queue = VecDeque::new();
1035
1036        // Start with direct matches
1037        if let Some(direct_matches) = self.concept_index.get(concept) {
1038            for &node_id in direct_matches {
1039                queue.push_back((node_id, 0));
1040            }
1041        }
1042
1043        // Breadth-first traversal
1044        while let Some((node_id, depth)) = queue.pop_front() {
1045            if depth >= max_depth || visited.contains(&node_id) {
1046                continue;
1047            }
1048
1049            visited.insert(node_id);
1050            related.push(node_id);
1051
1052            if let Some(node) = self.nodes.get(&node_id) {
1053                for edge in &node.connections {
1054                    if !visited.contains(&edge.target_node_id) {
1055                        queue.push_back((edge.target_node_id, depth + 1));
1056                    }
1057                }
1058            }
1059        }
1060
1061        related
1062    }
1063}
1064
1065impl Default for KnowledgeGraph {
1066    fn default() -> Self {
1067        Self::new()
1068    }
1069}
1070
1071#[cfg(test)]
1072mod tests {
1073    use super::*;
1074
1075    #[test]
1076    fn test_reflection_config_defaults() {
1077        let config = ReflectionConfig::default();
1078        assert_eq!(config.importance_trigger_threshold, 150.0);
1079        assert_eq!(config.target_insights_per_reflection, 3);
1080        assert_eq!(config.insight_importance_multiplier, 1.5);
1081    }
1082
1083    #[test]
1084    fn test_knowledge_graph_creation() {
1085        let mut graph = KnowledgeGraph::new();
1086
1087        let node = KnowledgeNode {
1088            id: Uuid::new_v4(),
1089            concept: "test_concept".to_string(),
1090            node_type: NodeType::Concept,
1091            embedding: None,
1092            confidence: 0.8,
1093            connections: Vec::new(),
1094            created_at: Utc::now(),
1095        };
1096
1097        let node_id = node.id;
1098        graph.add_node(node);
1099
1100        assert!(graph.nodes.contains_key(&node_id));
1101        assert!(graph.concept_index.contains_key("test_concept"));
1102    }
1103
1104    #[test]
1105    fn test_insight_type_serialization() {
1106        let insight_type = InsightType::Pattern;
1107        let serialized = serde_json::to_string(&insight_type).unwrap();
1108        let deserialized: InsightType = serde_json::from_str(&serialized).unwrap();
1109        assert_eq!(insight_type, deserialized);
1110    }
1111}
codex_memory/memory/reflection_engine.rs

codex_memory/memory/
reflection_engine.rs