shodh_memory/
graph_memory.rs

1//! Graph Memory System - Inspired by Graphiti
2//!
3//! Temporal knowledge graph for tracking entities, relationships, and episodic memories.
4//! Implements bi-temporal tracking and hybrid retrieval (semantic + graph traversal).
5
6use anyhow::Result;
7use chrono::{DateTime, Utc};
8use rocksdb::{ColumnFamily, ColumnFamilyDescriptor, Options, WriteBatch, DB};
9use rust_stemmers::{Algorithm, Stemmer};
10use serde::{Deserialize, Serialize};
11use std::cmp::Ordering as CmpOrdering;
12use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque};
13use std::path::Path;
14use std::sync::atomic::{AtomicUsize, Ordering};
15use std::sync::Arc;
16use uuid::Uuid;
17
18use crate::constants::{
19    ENTITY_CONCEPT_MERGE_THRESHOLD, ENTITY_EMBEDDING_CACHE_MAX, LTP_MIN_STRENGTH, LTP_PRUNE_FLOOR,
20};
21
22// Column family names for the unified graph database
23const CF_ENTITIES: &str = "entities";
24const CF_RELATIONSHIPS: &str = "relationships";
25const CF_EPISODES: &str = "episodes";
26const CF_ENTITY_EDGES: &str = "entity_edges";
27const CF_ENTITY_PAIR_INDEX: &str = "entity_pair_index";
28const CF_ENTITY_EPISODES: &str = "entity_episodes";
29const CF_NAME_INDEX: &str = "name_index";
30const CF_LOWERCASE_INDEX: &str = "lowercase_index";
31const CF_STEMMED_INDEX: &str = "stemmed_index";
32
33const GRAPH_CF_NAMES: &[&str] = &[
34    CF_ENTITIES,
35    CF_RELATIONSHIPS,
36    CF_EPISODES,
37    CF_ENTITY_EDGES,
38    CF_ENTITY_PAIR_INDEX,
39    CF_ENTITY_EPISODES,
40    CF_NAME_INDEX,
41    CF_LOWERCASE_INDEX,
42    CF_STEMMED_INDEX,
43];
44
45/// Entity node in the knowledge graph
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct EntityNode {
48    /// Unique identifier
49    pub uuid: Uuid,
50
51    /// Entity name (e.g., "John", "Paris", "Rust programming")
52    pub name: String,
53
54    /// Entity labels/types (e.g., ["Person"], ["Location", "City"])
55    pub labels: Vec<EntityLabel>,
56
57    /// When this entity was first created in the graph
58    pub created_at: DateTime<Utc>,
59
60    /// When this entity was last observed
61    pub last_seen_at: DateTime<Utc>,
62
63    /// How many times this entity has been mentioned
64    pub mention_count: usize,
65
66    /// Summary of this entity's context (built from surrounding edges)
67    pub summary: String,
68
69    /// Additional attributes based on entity type
70    pub attributes: HashMap<String, String>,
71
72    /// Semantic embedding of the entity name (for similarity search)
73    pub name_embedding: Option<Vec<f32>>,
74
75    /// Salience score (0.0 - 1.0): How important is this entity?
76    /// Higher salience = larger gravitational well in the memory universe
77    /// Factors: proper noun status, mention frequency, recency, user-defined importance
78    #[serde(default = "default_salience")]
79    pub salience: f32,
80
81    /// Whether this is a proper noun (names, places, products)
82    /// Proper nouns have higher base salience than common nouns
83    #[serde(default)]
84    pub is_proper_noun: bool,
85}
86
87fn default_salience() -> f32 {
88    0.5 // Default middle salience
89}
90
91/// Entity labels following Graphiti's categorization
92#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
93pub enum EntityLabel {
94    Person,
95    Organization,
96    Location,
97    Technology,
98    Concept,
99    Event,
100    Date,
101    Product,
102    Skill,
103    /// YAKE-extracted discriminative keyword (not a named entity)
104    /// Used for graph-based retrieval of rare/important terms like "sunrise"
105    Keyword,
106    Other(String),
107}
108
109impl EntityLabel {
110    /// Get string representation of the entity label
111    #[allow(unused)] // Public API for serialization/display
112    pub fn as_str(&self) -> &str {
113        match self {
114            Self::Person => "Person",
115            Self::Organization => "Organization",
116            Self::Location => "Location",
117            Self::Technology => "Technology",
118            Self::Concept => "Concept",
119            Self::Event => "Event",
120            Self::Date => "Date",
121            Self::Product => "Product",
122            Self::Skill => "Skill",
123            Self::Keyword => "Keyword",
124            Self::Other(s) => s.as_str(),
125        }
126    }
127}
128
129/// Memory tier for edge consolidation
130///
131/// Based on hippocampal-cortical memory consolidation research:
132/// - L1 (Working): Dense, fast encoding, aggressive pruning (Dentate Gyrus-like)
133/// - L2 (Episodic): Moderate density, Hebbian selection (CA1/CA3-like)
134/// - L3 (Semantic): Sparse, near-permanent (Neocortex-like)
135#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
136pub enum EdgeTier {
137    /// Working memory tier: new edges, dense, aggressive decay
138    #[default]
139    L1Working,
140    /// Episodic memory tier: proven edges, moderate decay
141    L2Episodic,
142    /// Semantic memory tier: consolidated edges, near-permanent
143    L3Semantic,
144}
145
146impl EdgeTier {
147    /// Get the initial weight for edges in this tier
148    pub fn initial_weight(&self) -> f32 {
149        use crate::constants::*;
150        match self {
151            Self::L1Working => L1_INITIAL_WEIGHT,
152            Self::L2Episodic => L2_PROMOTION_WEIGHT,
153            Self::L3Semantic => L3_PROMOTION_WEIGHT,
154        }
155    }
156
157    /// Get the prune threshold for this tier
158    pub fn prune_threshold(&self) -> f32 {
159        use crate::constants::*;
160        match self {
161            Self::L1Working => L1_PRUNE_THRESHOLD,
162            Self::L2Episodic => L2_PRUNE_THRESHOLD,
163            Self::L3Semantic => L3_PRUNE_THRESHOLD,
164        }
165    }
166
167    /// Get the promotion threshold to move to next tier
168    pub fn promotion_threshold(&self) -> Option<f32> {
169        use crate::constants::*;
170        match self {
171            Self::L1Working => Some(L1_PROMOTION_THRESHOLD),
172            Self::L2Episodic => Some(L2_PROMOTION_THRESHOLD),
173            Self::L3Semantic => None, // Already at highest tier
174        }
175    }
176
177    /// Get the next tier (for promotion)
178    pub fn next_tier(&self) -> Option<Self> {
179        match self {
180            Self::L1Working => Some(Self::L2Episodic),
181            Self::L2Episodic => Some(Self::L3Semantic),
182            Self::L3Semantic => None,
183        }
184    }
185
186    /// Get target density for this tier
187    pub fn target_density(&self) -> f32 {
188        use crate::constants::*;
189        match self {
190            Self::L1Working => L1_TARGET_DENSITY,
191            Self::L2Episodic => L2_TARGET_DENSITY,
192            Self::L3Semantic => L3_TARGET_DENSITY,
193        }
194    }
195}
196
197/// Long-Term Potentiation status for edges (PIPE-4)
198///
199/// Multi-scale LTP based on neuroscience research:
200/// - Burst: Temporary protection from high-frequency activation (E-LTP)
201/// - Weekly: Moderate protection from consistent routine use (L-LTP)
202/// - Full: Maximum protection from sustained long-term use (systems consolidation)
203///
204/// Reference: Frey & Morris (1997) "Synaptic tagging and long-term potentiation"
205#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
206pub enum LtpStatus {
207    /// Not potentiated - normal decay applies
208    #[default]
209    None,
210
211    /// Burst potentiated: 5+ activations in 24 hours
212    /// Temporary protection (2x slower decay) that expires after 48h
213    /// Represents early-phase LTP (protein synthesis independent)
214    Burst {
215        /// When burst was detected (for expiration check)
216        #[serde(with = "chrono::serde::ts_seconds")]
217        detected_at: DateTime<Utc>,
218    },
219
220    /// Weekly potentiated: 3+/week for 2+ weeks
221    /// Moderate protection (3x slower decay)
222    /// Represents late-phase LTP (habit formation)
223    Weekly,
224
225    /// Fully potentiated: 10+ activations OR 5+ over 30 days
226    /// Maximum protection (10x slower decay)
227    /// Represents systems consolidation (semantic memory)
228    Full,
229}
230
231impl LtpStatus {
232    /// Get the decay factor for this LTP status
233    pub fn decay_factor(&self) -> f32 {
234        use crate::constants::*;
235        match self {
236            Self::None => 1.0,
237            Self::Burst { detected_at } => {
238                // Check if burst has expired
239                let hours_since = (Utc::now() - *detected_at).num_hours();
240                if hours_since > LTP_BURST_DURATION_HOURS {
241                    1.0 // Expired, normal decay
242                } else {
243                    LTP_BURST_DECAY_FACTOR
244                }
245            }
246            Self::Weekly => LTP_WEEKLY_DECAY_FACTOR,
247            Self::Full => LTP_DECAY_FACTOR,
248        }
249    }
250
251    /// Check if this status provides any protection
252    pub fn is_potentiated(&self) -> bool {
253        !matches!(self, Self::None)
254    }
255
256    /// Check if burst protection has expired
257    pub fn is_burst_expired(&self) -> bool {
258        use crate::constants::LTP_BURST_DURATION_HOURS;
259        match self {
260            Self::Burst { detected_at } => {
261                (Utc::now() - *detected_at).num_hours() > LTP_BURST_DURATION_HOURS
262            }
263            _ => false,
264        }
265    }
266
267    /// Get priority for LTP upgrades (higher = stronger protection)
268    pub fn priority(&self) -> u8 {
269        match self {
270            Self::None => 0,
271            Self::Burst { .. } => 1,
272            Self::Weekly => 2,
273            Self::Full => 3,
274        }
275    }
276}
277
278/// Relationship edge between entities
279///
280/// Implements Hebbian synaptic plasticity: "Neurons that fire together, wire together"
281/// - Strength increases with co-activation (strengthen method)
282/// - Strength decays over time without use (decay method)
283/// - Long-Term Potentiation (LTP): After threshold activations, becomes permanent
284#[derive(Debug, Clone, Serialize, Deserialize)]
285pub struct RelationshipEdge {
286    /// Unique identifier for this edge
287    pub uuid: Uuid,
288
289    /// Source entity UUID
290    pub from_entity: Uuid,
291
292    /// Target entity UUID
293    pub to_entity: Uuid,
294
295    /// Type of relationship
296    pub relation_type: RelationType,
297
298    /// Confidence/strength of this relationship (0.0 to 1.0)
299    /// Dynamic: increases with co-activation, decays without use
300    pub strength: f32,
301
302    /// When this relationship was created
303    pub created_at: DateTime<Utc>,
304
305    /// When this relationship was last observed (temporal tracking)
306    pub valid_at: DateTime<Utc>,
307
308    /// Whether this relationship has been invalidated (temporal edge invalidation)
309    pub invalidated_at: Option<DateTime<Utc>>,
310
311    /// Source episode that created this relationship
312    pub source_episode_id: Option<Uuid>,
313
314    /// Additional context about the relationship
315    pub context: String,
316
317    // === Hebbian Synaptic Plasticity Fields ===
318    /// When this synapse was last activated (used in retrieval/traversal)
319    /// Used to calculate time-based decay
320    #[serde(default = "default_last_activated")]
321    pub last_activated: DateTime<Utc>,
322
323    /// Number of times both entities were co-accessed (Hebbian co-activation)
324    /// Higher count = stronger learned association
325    #[serde(default)]
326    pub activation_count: u32,
327
328    /// Long-Term Potentiation status (PIPE-4: multi-scale LTP)
329    /// Replaces simple bool with tiered protection levels:
330    /// - None: Normal decay
331    /// - Burst: Temporary 2x protection (5+ activations in 24h)
332    /// - Weekly: Moderate 3x protection (3+/week for 2 weeks)
333    /// - Full: Maximum 10x protection (10+ activations or 5+ over 30 days)
334    #[serde(default)]
335    pub ltp_status: LtpStatus,
336
337    /// Memory tier for consolidation (L1→L2→L3)
338    /// Edges start in L1 (working memory) and promote based on Hebbian strength
339    #[serde(default)]
340    pub tier: EdgeTier,
341
342    /// Activation timestamp history for temporal pattern detection (PIPE-4)
343    /// Only populated for L2+ edges (L1 edges die too quickly to need history)
344    /// Capacity: L2 = 20 timestamps, L3 = 50 timestamps
345    /// Enables: burst detection, weekly patterns, temporal query relevance
346    #[serde(default)]
347    pub activation_timestamps: Option<VecDeque<DateTime<Utc>>>,
348
349    /// Entity extraction confidence (PIPE-5: Unified LTP Readiness)
350    /// Average confidence of the entities connected by this edge.
351    /// Affects LTP threshold: high confidence → faster LTP (7 activations)
352    /// Low confidence → slower LTP (13 activations).
353    /// Based on synaptic tagging: behaviorally relevant synapses consolidate faster.
354    #[serde(default)]
355    pub entity_confidence: Option<f32>,
356}
357
358fn default_last_activated() -> DateTime<Utc> {
359    Utc::now()
360}
361
362// Hebbian learning constants now imported from crate::constants:
363// - LTP_LEARNING_RATE (0.1): η for strength increase per co-activation
364// - LTP_DECAY_HALF_LIFE_DAYS (14.0): λ for time-based decay
365// - LTP_THRESHOLD (10): Activations needed for Full LTP
366// - LTP_DECAY_FACTOR (0.1): Fully potentiated synapses decay 10x slower
367// - LTP_MIN_STRENGTH (0.01): Floor to prevent complete forgetting
368// PIPE-4 additions:
369// - LTP_BURST_THRESHOLD (5): Activations in 24h for burst LTP
370// - LTP_BURST_WINDOW_HOURS (24): Window for burst detection
371// - LTP_WEEKLY_THRESHOLD (3): Activations per week for weekly LTP
372// - LTP_WEEKLY_MIN_WEEKS (2): Minimum weeks of consistent activation
373
374impl RelationshipEdge {
375    /// Strengthen this synapse (Hebbian learning)
376    ///
377    /// Called when both connected entities are accessed together.
378    /// Formula: w_new = w_old + η × (1 - w_old) × co_activation_boost
379    ///
380    /// PIPE-4: Multi-scale LTP detection
381    /// - Records activation timestamps for L2+ edges
382    /// - Detects burst patterns (5+ in 24h) → temporary protection
383    /// - Detects weekly patterns (3+/week for 2 weeks) → moderate protection
384    /// - Detects sustained patterns (10+ total or 5+ over 30 days) → full protection
385    ///
386    /// Also handles tier promotion (L1→L2→L3) when strength exceeds tier threshold.
387    ///
388    /// Returns `Some((old_tier_name, new_tier_name))` if a tier promotion occurred,
389    /// `None` otherwise. This enables the memory-edge coupling: edge promotions
390    /// can signal the memory layer to boost the associated memory's importance.
391    pub fn strengthen(&mut self) -> Option<(String, String)> {
392        use crate::constants::*;
393
394        let now = Utc::now();
395        self.activation_count += 1;
396        self.last_activated = now;
397
398        // PIPE-4: Record activation timestamp for L2+ edges
399        self.record_activation_timestamp(now);
400
401        // Hebbian strengthening with tier-specific boost
402        let tier_boost = match self.tier {
403            EdgeTier::L1Working => TIER_CO_ACCESS_BOOST,
404            EdgeTier::L2Episodic => TIER_CO_ACCESS_BOOST * 0.8,
405            EdgeTier::L3Semantic => TIER_CO_ACCESS_BOOST * 0.5,
406        };
407        let boost = (LTP_LEARNING_RATE + tier_boost) * (1.0 - self.strength);
408        self.strength = (self.strength + boost).min(1.0);
409
410        // PIPE-4: Multi-scale LTP detection (only upgrade, never downgrade)
411        let new_ltp_status = self.detect_ltp_status(now);
412        if new_ltp_status.priority() > self.ltp_status.priority() {
413            let old_status = self.ltp_status;
414            self.ltp_status = new_ltp_status;
415
416            // LTP bonus: immediate strength boost on upgrade
417            let bonus = match new_ltp_status {
418                LtpStatus::Burst { .. } => 0.05,
419                LtpStatus::Weekly => 0.1,
420                LtpStatus::Full => 0.2,
421                LtpStatus::None => 0.0,
422            };
423            self.strength = (self.strength + bonus).min(1.0);
424
425            tracing::debug!(
426                "Edge {} LTP upgrade: {:?} → {:?} (activations: {}, age: {} days)",
427                self.uuid,
428                old_status,
429                self.ltp_status,
430                self.activation_count,
431                (now - self.created_at).num_days()
432            );
433        }
434
435        // Check for burst expiration and potential downgrade
436        if self.ltp_status.is_burst_expired() {
437            // Burst expired - check if weekly pattern has emerged
438            let weekly_check = self.detect_weekly_pattern();
439            if weekly_check {
440                self.ltp_status = LtpStatus::Weekly;
441            } else {
442                self.ltp_status = LtpStatus::None;
443            }
444        }
445
446        // Tier promotion: check if strength exceeds current tier's promotion threshold
447        let mut promotion_result = None;
448        if let Some(threshold) = self.tier.promotion_threshold() {
449            if self.strength >= threshold {
450                if let Some(next_tier) = self.tier.next_tier() {
451                    let old_tier = self.tier;
452                    self.tier = next_tier;
453                    // Preserve strength if already above next tier's initial weight
454                    self.strength = self.strength.max(next_tier.initial_weight());
455
456                    // PIPE-4: Initialize activation_timestamps on L1→L2 promotion
457                    if old_tier == EdgeTier::L1Working {
458                        self.activation_timestamps =
459                            Some(VecDeque::with_capacity(ACTIVATION_HISTORY_L2_CAPACITY));
460                        // Seed with current timestamp
461                        if let Some(ref mut ts) = self.activation_timestamps {
462                            ts.push_back(now);
463                        }
464                    }
465
466                    // Expand capacity on L2→L3 promotion
467                    if old_tier == EdgeTier::L2Episodic {
468                        if let Some(ref mut ts) = self.activation_timestamps {
469                            let current = ts.capacity();
470                            if current < ACTIVATION_HISTORY_L3_CAPACITY {
471                                ts.reserve(ACTIVATION_HISTORY_L3_CAPACITY - current);
472                            }
473                        }
474                    }
475
476                    tracing::debug!(
477                        "Edge {} promoted: {:?} → {:?}",
478                        self.uuid,
479                        old_tier,
480                        self.tier
481                    );
482
483                    promotion_result =
484                        Some((format!("{:?}", old_tier), format!("{:?}", self.tier)));
485                }
486            }
487        }
488
489        // PIPE-5: L3 auto-LTP removed - now handled by unified ltp_readiness()
490        // The readiness formula combines strength + activation count + entity confidence,
491        // ensuring both intensity and repetition evidence are required for Full LTP.
492
493        promotion_result
494    }
495
496    /// Record an activation timestamp (PIPE-4)
497    ///
498    /// Only records for L2+ edges. Maintains capacity limits.
499    fn record_activation_timestamp(&mut self, timestamp: DateTime<Utc>) {
500        use crate::constants::*;
501
502        // L1 edges don't track history (too transient)
503        if matches!(self.tier, EdgeTier::L1Working) {
504            return;
505        }
506
507        // Initialize if needed
508        if self.activation_timestamps.is_none() {
509            let capacity = match self.tier {
510                EdgeTier::L1Working => return,
511                EdgeTier::L2Episodic => ACTIVATION_HISTORY_L2_CAPACITY,
512                EdgeTier::L3Semantic => ACTIVATION_HISTORY_L3_CAPACITY,
513            };
514            self.activation_timestamps = Some(VecDeque::with_capacity(capacity));
515        }
516
517        if let Some(ref mut timestamps) = self.activation_timestamps {
518            let capacity = match self.tier {
519                EdgeTier::L1Working => return,
520                EdgeTier::L2Episodic => ACTIVATION_HISTORY_L2_CAPACITY,
521                EdgeTier::L3Semantic => ACTIVATION_HISTORY_L3_CAPACITY,
522            };
523
524            // Maintain capacity limit (ring buffer behavior)
525            while timestamps.len() >= capacity {
526                timestamps.pop_front();
527            }
528            timestamps.push_back(timestamp);
529        }
530    }
531
532    /// Detect LTP status based on unified readiness model (PIPE-4 + PIPE-5)
533    ///
534    /// PIPE-5 unifies LTP detection into a single readiness score that combines:
535    /// - Activation count (repetition path)
536    /// - Strength (intensity/durability path)
537    /// - Entity confidence (synaptic tagging bonus)
538    ///
539    /// Multiple paths can lead to Full LTP:
540    /// - High repetition alone (15+ activations)
541    /// - High intensity alone (0.95+ strength at L3)
542    /// - Balanced contribution from both
543    /// - High-confidence edges reach threshold ~30% faster
544    ///
545    /// Temporal patterns (Burst, Weekly) remain separate as they represent
546    /// different consolidation mechanisms (E-LTP vs habit formation).
547    fn detect_ltp_status(&self, now: DateTime<Utc>) -> LtpStatus {
548        use crate::constants::*;
549
550        // PIPE-5: Unified LTP readiness for Full LTP
551        // Combines activation count, strength, and entity confidence
552        if self.ltp_readiness() >= LTP_READINESS_THRESHOLD {
553            return LtpStatus::Full;
554        }
555
556        // Legacy time-aware path: 5+ activations over 30+ days
557        // Kept for backward compatibility and edges that survived long decay
558        let edge_age_days = (now - self.created_at).num_days();
559        if edge_age_days >= LTP_TIME_AWARE_DAYS && self.activation_count >= LTP_TIME_AWARE_THRESHOLD
560        {
561            return LtpStatus::Full;
562        }
563
564        // Check for Weekly LTP (requires timestamp history)
565        // Temporal pattern: 3+/week for 2+ weeks indicates habit
566        if self.detect_weekly_pattern() {
567            return LtpStatus::Weekly;
568        }
569
570        // Check for Burst LTP (requires timestamp history)
571        // Temporal pattern: 5+ in 24h indicates high immediate interest
572        if self.detect_burst_pattern(now) {
573            return LtpStatus::Burst { detected_at: now };
574        }
575
576        LtpStatus::None
577    }
578
579    /// Detect burst pattern: 5+ activations in 24 hours (PIPE-4)
580    fn detect_burst_pattern(&self, now: DateTime<Utc>) -> bool {
581        use crate::constants::*;
582        use chrono::Duration;
583
584        let timestamps = match &self.activation_timestamps {
585            Some(ts) => ts,
586            None => return false,
587        };
588
589        let window_start = now - Duration::hours(LTP_BURST_WINDOW_HOURS);
590        let count_in_window = timestamps.iter().filter(|&&ts| ts >= window_start).count();
591
592        count_in_window >= LTP_BURST_THRESHOLD as usize
593    }
594
595    /// Detect weekly pattern: 3+/week for 2+ weeks (PIPE-4)
596    fn detect_weekly_pattern(&self) -> bool {
597        use crate::constants::*;
598        use chrono::Duration;
599
600        let timestamps = match &self.activation_timestamps {
601            Some(ts) => ts,
602            None => return false,
603        };
604
605        if timestamps.is_empty() {
606            return false;
607        }
608
609        let now = Utc::now();
610        let mut weeks_meeting_threshold = 0u32;
611
612        // Check each of the last LTP_WEEKLY_MIN_WEEKS weeks
613        for week_offset in 0..LTP_WEEKLY_MIN_WEEKS {
614            let week_end = now - Duration::weeks(week_offset as i64);
615            let week_start = week_end - Duration::weeks(1);
616
617            let count_in_week = timestamps
618                .iter()
619                .filter(|&&ts| ts >= week_start && ts < week_end)
620                .count();
621
622            if count_in_week >= LTP_WEEKLY_THRESHOLD as usize {
623                weeks_meeting_threshold += 1;
624            }
625        }
626
627        weeks_meeting_threshold >= LTP_WEEKLY_MIN_WEEKS
628    }
629
630    /// Get activation count within a time window (for temporal retrieval scoring)
631    pub fn activations_in_window(&self, start: DateTime<Utc>, end: DateTime<Utc>) -> usize {
632        match &self.activation_timestamps {
633            Some(ts) => ts.iter().filter(|&&t| t >= start && t <= end).count(),
634            None => 0,
635        }
636    }
637
638    /// Check if edge was active at similar time of day (for temporal retrieval)
639    pub fn time_of_day_match(&self, target_hour: u32, window_hours: u32) -> usize {
640        use chrono::Timelike;
641
642        match &self.activation_timestamps {
643            Some(ts) => ts
644                .iter()
645                .filter(|t| {
646                    let hour = t.hour();
647                    let diff = if hour > target_hour {
648                        (hour - target_hour).min(24 + target_hour - hour)
649                    } else {
650                        (target_hour - hour).min(24 + hour - target_hour)
651                    };
652                    diff <= window_hours
653                })
654                .count(),
655            None => 0,
656        }
657    }
658
659    /// Apply time-based decay to this synapse
660    ///
661    /// Uses tier-aware decay model (3-tier memory consolidation):
662    /// - L1 (Working): ~2.9%/hour decay (λ=0.029), max 48 hours before prune
663    /// - L2 (Episodic): ~3.1%/day decay (λ=0.031), max 30 days before prune
664    /// - L3 (Semantic): ~2%/month decay (λ=0.02/720h), near-permanent
665    ///
666    /// PIPE-4: Multi-scale LTP protection
667    /// - Burst: 2x slower decay (temporary, 48h)
668    /// - Weekly: 3x slower decay (habit protection)
669    /// - Full: 10x slower decay (permanent protection)
670    ///
671    /// **Important:** Updates `last_activated` to prevent double-decay on
672    /// repeated calls.
673    ///
674    /// Returns true if synapse should be pruned (below tier's threshold)
675    pub fn decay(&mut self) -> bool {
676        use crate::decay::tier_decay_factor;
677
678        let now = Utc::now();
679        let elapsed = now.signed_duration_since(self.last_activated);
680        let hours_elapsed = elapsed.num_seconds() as f64 / 3600.0;
681
682        if hours_elapsed <= 0.0 {
683            return false;
684        }
685
686        // Cap max decay to protect against clock jumps (max 1 year = 8760 hours)
687        let hours_elapsed = hours_elapsed.min(8760.0);
688
689        // Tier-aware decay with PIPE-4 multi-scale LTP
690        let tier_num = match self.tier {
691            EdgeTier::L1Working => 0,
692            EdgeTier::L2Episodic => 1,
693            EdgeTier::L3Semantic => 2,
694        };
695        let ltp_factor = self.ltp_status.decay_factor();
696        let (decay_factor, exceeded_max_age) =
697            tier_decay_factor(hours_elapsed, tier_num, ltp_factor);
698        self.strength *= decay_factor;
699
700        // Update last_activated to prevent double-decay on repeated calls
701        self.last_activated = now;
702
703        // Apply floor to prevent complete forgetting
704        let prune_threshold = self.tier.prune_threshold();
705        if self.strength < LTP_MIN_STRENGTH {
706            self.strength = LTP_MIN_STRENGTH;
707        }
708
709        // Downgrade expired burst LTP before prune decision
710        // decay_factor() already returns 1.0 for expired bursts (correct rate),
711        // but is_potentiated() still returns true — preventing pruning
712        if self.ltp_status.is_burst_expired() {
713            if self.detect_weekly_pattern() {
714                self.ltp_status = LtpStatus::Weekly;
715            } else {
716                self.ltp_status = LtpStatus::None;
717            }
718        }
719
720        // Strip LTP protection from near-zero edges (zombie edge cleanup)
721        // Prevents immortal edges that retain LTP despite negligible strength
722        if self.ltp_status.is_potentiated() && self.strength <= LTP_PRUNE_FLOOR {
723            self.ltp_status = LtpStatus::None;
724        }
725
726        // Return whether this synapse should be pruned
727        // Prune if: exceeded max age OR below prune threshold (unless any LTP protection)
728        if self.ltp_status.is_potentiated() {
729            false
730        } else {
731            exceeded_max_age || self.strength <= prune_threshold
732        }
733    }
734
735    /// Get the effective strength considering recency
736    ///
737    /// This is a read-only version that calculates what the strength
738    /// would be after decay, without modifying the edge.
739    /// Uses tier-aware decay (L1/L2/L3 have different decay rates).
740    pub fn effective_strength(&self) -> f32 {
741        use crate::decay::tier_decay_factor;
742
743        let now = Utc::now();
744        let elapsed = now.signed_duration_since(self.last_activated);
745        let hours_elapsed = elapsed.num_seconds() as f64 / 3600.0;
746
747        if hours_elapsed <= 0.0 {
748            return self.strength;
749        }
750
751        let tier_num = match self.tier {
752            EdgeTier::L1Working => 0,
753            EdgeTier::L2Episodic => 1,
754            EdgeTier::L3Semantic => 2,
755        };
756        let ltp_factor = self.ltp_status.decay_factor();
757        let (decay_factor, _) = tier_decay_factor(hours_elapsed, tier_num, ltp_factor);
758        (self.strength * decay_factor).max(LTP_MIN_STRENGTH)
759    }
760
761    /// Check if this edge has any LTP protection (for backward compatibility)
762    pub fn is_potentiated(&self) -> bool {
763        self.ltp_status.is_potentiated()
764    }
765
766    // =========================================================================
767    // PIPE-5: Unified LTP Readiness Model
768    // =========================================================================
769
770    /// Get confidence-adjusted LTP threshold (PIPE-5)
771    ///
772    /// High-confidence edges (strong entity extraction) need fewer activations.
773    /// Low-confidence edges need more activations to prove value.
774    ///
775    /// Returns: threshold in range [LTP_THRESHOLD_MIN, LTP_THRESHOLD_MAX]
776    pub fn adjusted_threshold(&self) -> u32 {
777        use crate::constants::*;
778
779        let confidence = self.entity_confidence.unwrap_or(0.5);
780
781        // Linear interpolation: high confidence → low threshold
782        // confidence 0.0 → threshold_max (13)
783        // confidence 1.0 → threshold_min (7)
784        let range = LTP_THRESHOLD_MAX - LTP_THRESHOLD_MIN;
785        let threshold = LTP_THRESHOLD_MAX as f32 - (confidence * range as f32);
786        threshold.round() as u32
787    }
788
789    /// Get tier-specific strength floor for Full LTP (PIPE-5)
790    ///
791    /// L2 edges have lower floor (still proving themselves).
792    /// L3 edges have higher floor (must demonstrate durability).
793    /// L1 edges return 1.0 (effectively impossible to reach Full LTP).
794    pub fn strength_floor(&self) -> f32 {
795        use crate::constants::*;
796
797        match self.tier {
798            EdgeTier::L1Working => 1.0, // L1 can't reach Full LTP via readiness
799            EdgeTier::L2Episodic => LTP_STRENGTH_FLOOR_L2,
800            EdgeTier::L3Semantic => LTP_STRENGTH_FLOOR_L3,
801        }
802    }
803
804    /// Calculate LTP readiness score (PIPE-5)
805    ///
806    /// Unified formula combining activation count, strength, and entity confidence:
807    /// - count_score = activation_count / adjusted_threshold
808    /// - strength_score = strength / strength_floor
809    /// - tag_bonus = entity_confidence * TAG_WEIGHT
810    ///
811    /// readiness = count_score * COUNT_WEIGHT + strength_score * STRENGTH_WEIGHT + tag_bonus
812    ///
813    /// Full LTP when readiness >= 1.0
814    ///
815    /// This allows multiple paths to LTP:
816    /// - Repetition-dominant: 15 activations can compensate for lower strength
817    /// - Intensity-dominant: 0.95 strength can compensate for fewer activations
818    /// - Balanced: 10 activations + 0.75 strength + moderate confidence
819    /// - Tagged boost: high-confidence edges reach LTP ~30% faster
820    pub fn ltp_readiness(&self) -> f32 {
821        use crate::constants::*;
822
823        // L1 edges can't reach Full LTP via readiness (too transient)
824        if matches!(self.tier, EdgeTier::L1Working) {
825            return 0.0;
826        }
827
828        let threshold = self.adjusted_threshold() as f32;
829        let floor = self.strength_floor();
830
831        // Count score: how close to activation threshold
832        let count_score = self.activation_count as f32 / threshold;
833
834        // Strength score: how close to strength floor
835        let strength_score = self.strength / floor;
836
837        // Tag bonus: entity confidence provides synaptic tagging advantage
838        let confidence = self.entity_confidence.unwrap_or(0.5);
839        let tag_bonus = confidence * LTP_READINESS_TAG_WEIGHT;
840
841        // Weighted combination
842        count_score * LTP_READINESS_COUNT_WEIGHT
843            + strength_score * LTP_READINESS_STRENGTH_WEIGHT
844            + tag_bonus
845    }
846}
847
848/// Relationship types following Graphiti's semantic model
849#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
850pub enum RelationType {
851    /// Work relationships
852    WorksWith,
853    WorksAt,
854    EmployedBy,
855
856    /// Structural relationships
857    PartOf,
858    Contains,
859    OwnedBy,
860
861    /// Location relationships
862    LocatedIn,
863    LocatedAt,
864
865    /// Usage relationships
866    Uses,
867    CreatedBy,
868    DevelopedBy,
869
870    /// Causal relationships
871    Causes,
872    ResultsIn,
873
874    /// Learning relationships
875    Learned,
876    Knows,
877    Teaches,
878
879    /// Generic relationships
880    RelatedTo,
881    AssociatedWith,
882
883    /// Memory co-retrieval (Hebbian association between memories)
884    CoRetrieved,
885
886    /// Sentence co-occurrence (entities appearing in same sentence)
887    /// Key for multi-hop: "Melanie" <-> "sunrise" when "Melanie painted a sunrise"
888    CoOccurs,
889
890    /// Custom relationship
891    Custom(String),
892}
893
894impl RelationType {
895    /// Get string representation of the relation type
896    #[allow(unused)] // Public API for serialization/display
897    pub fn as_str(&self) -> &str {
898        match self {
899            Self::WorksWith => "WorksWith",
900            Self::WorksAt => "WorksAt",
901            Self::EmployedBy => "EmployedBy",
902            Self::PartOf => "PartOf",
903            Self::Contains => "Contains",
904            Self::OwnedBy => "OwnedBy",
905            Self::LocatedIn => "LocatedIn",
906            Self::LocatedAt => "LocatedAt",
907            Self::Uses => "Uses",
908            Self::CreatedBy => "CreatedBy",
909            Self::DevelopedBy => "DevelopedBy",
910            Self::Causes => "Causes",
911            Self::ResultsIn => "ResultsIn",
912            Self::Learned => "Learned",
913            Self::Knows => "Knows",
914            Self::Teaches => "Teaches",
915            Self::RelatedTo => "RelatedTo",
916            Self::AssociatedWith => "AssociatedWith",
917            Self::CoRetrieved => "CoRetrieved",
918            Self::CoOccurs => "CoOccurs",
919            Self::Custom(s) => s.as_str(),
920        }
921    }
922}
923
924/// Episodic node representing a discrete experience/memory
925#[derive(Debug, Clone, Serialize, Deserialize)]
926pub struct EpisodicNode {
927    /// Unique identifier
928    pub uuid: Uuid,
929
930    /// Human-readable name/title
931    pub name: String,
932
933    /// Episode content (the actual experience data)
934    pub content: String,
935
936    /// When the original event occurred (event time)
937    pub valid_at: DateTime<Utc>,
938
939    /// When this was ingested into the system (ingestion time)
940    pub created_at: DateTime<Utc>,
941
942    /// Entities extracted from this episode
943    pub entity_refs: Vec<Uuid>,
944
945    /// Source type
946    pub source: EpisodeSource,
947
948    /// Additional metadata
949    pub metadata: HashMap<String, String>,
950}
951
952/// Episode source types
953#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
954pub enum EpisodeSource {
955    Message,
956    Document,
957    Event,
958    Observation,
959}
960
961/// Graph memory storage and operations
962///
963/// Uses a single RocksDB instance with 9 column families for all graph data.
964/// This reduces file descriptor usage from 9 separate DBs to 1 (sharing WAL, MANIFEST, LOCK).
965pub struct GraphMemory {
966    /// Unified RocksDB database with column families for entities, relationships,
967    /// episodes, and all index tables
968    db: Arc<DB>,
969
970    /// In-memory entity name index for fast lookups (loaded from name_index CF)
971    entity_name_index: Arc<parking_lot::RwLock<HashMap<String, Uuid>>>,
972
973    /// In-memory lowercase name index for O(1) case-insensitive lookups
974    entity_lowercase_index: Arc<parking_lot::RwLock<HashMap<String, Uuid>>>,
975
976    /// In-memory stemmed name index for O(1) linguistic lookups
977    /// Key: Porter-stemmed lowercase name, Value: Entity UUID
978    entity_stemmed_index: Arc<parking_lot::RwLock<HashMap<String, Uuid>>>,
979
980    // === Atomic counters for O(1) stats (P1 fix) ===
981    /// Entity count - initialized from entity_name_index.len(), updated on add
982    entity_count: Arc<AtomicUsize>,
983
984    /// Relationship count - initialized on startup, updated on add
985    relationship_count: Arc<AtomicUsize>,
986
987    /// Episode count - initialized on startup, updated on add
988    episode_count: Arc<AtomicUsize>,
989
990    /// Mutex for serializing synapse updates to prevent race conditions (SHO-64)
991    /// Uses parking_lot::Mutex for better performance than std::sync::Mutex
992    synapse_update_lock: Arc<parking_lot::Mutex<()>>,
993
994    /// In-memory cache of entity name embeddings for concept merging.
995    /// Maps entity UUID → embedding vector. Loaded on startup, updated on add.
996    /// Used when string-based dedup (exact/case/stemmed) fails — catches synonyms
997    /// like "authentication" ↔ "auth" via cosine similarity.
998    entity_embedding_cache: Arc<parking_lot::RwLock<Vec<(Uuid, Vec<f32>)>>>,
999
1000    /// Edges found below prune threshold during lazy-decay reads.
1001    /// Flushed as batch deletes on each maintenance cycle (no full scan needed).
1002    pending_prune: parking_lot::Mutex<Vec<Uuid>>,
1003
1004    /// Entities that may have become orphaned from pruned edges.
1005    /// Checked during flush_pending_maintenance().
1006    pending_orphan_checks: parking_lot::Mutex<Vec<Uuid>>,
1007}
1008
1009impl GraphMemory {
1010    /// Get a reference to the underlying RocksDB instance (for backup/checkpoint).
1011    pub fn get_db(&self) -> &DB {
1012        &self.db
1013    }
1014
1015    // Column family accessors — cheap HashMap lookups on DB internals
1016    fn entities_cf(&self) -> &ColumnFamily {
1017        self.db
1018            .cf_handle(CF_ENTITIES)
1019            .expect("entities CF must exist")
1020    }
1021    fn relationships_cf(&self) -> &ColumnFamily {
1022        self.db
1023            .cf_handle(CF_RELATIONSHIPS)
1024            .expect("relationships CF must exist")
1025    }
1026    fn episodes_cf(&self) -> &ColumnFamily {
1027        self.db
1028            .cf_handle(CF_EPISODES)
1029            .expect("episodes CF must exist")
1030    }
1031    fn entity_edges_cf(&self) -> &ColumnFamily {
1032        self.db
1033            .cf_handle(CF_ENTITY_EDGES)
1034            .expect("entity_edges CF must exist")
1035    }
1036    fn entity_pair_index_cf(&self) -> &ColumnFamily {
1037        self.db
1038            .cf_handle(CF_ENTITY_PAIR_INDEX)
1039            .expect("entity_pair_index CF must exist")
1040    }
1041    fn entity_episodes_cf(&self) -> &ColumnFamily {
1042        self.db
1043            .cf_handle(CF_ENTITY_EPISODES)
1044            .expect("entity_episodes CF must exist")
1045    }
1046    fn name_index_cf(&self) -> &ColumnFamily {
1047        self.db
1048            .cf_handle(CF_NAME_INDEX)
1049            .expect("name_index CF must exist")
1050    }
1051    fn lowercase_index_cf(&self) -> &ColumnFamily {
1052        self.db
1053            .cf_handle(CF_LOWERCASE_INDEX)
1054            .expect("lowercase_index CF must exist")
1055    }
1056    fn stemmed_index_cf(&self) -> &ColumnFamily {
1057        self.db
1058            .cf_handle(CF_STEMMED_INDEX)
1059            .expect("stemmed_index CF must exist")
1060    }
1061
1062    /// Create a new graph memory system.
1063    ///
1064    /// If `shared_cache` is provided, block-cache reads are charged against the
1065    /// shared LRU cache (recommended for multi-tenant server mode). When `None`,
1066    /// a small per-instance cache is created (standalone / test use).
1067    pub fn new(path: &Path, shared_cache: Option<&rocksdb::Cache>) -> Result<Self> {
1068        use crate::constants::ROCKSDB_GRAPH_WRITE_BUFFER_BYTES;
1069
1070        let graph_path = path.join("graph");
1071        std::fs::create_dir_all(&graph_path)?;
1072
1073        let mut opts = Options::default();
1074        opts.create_if_missing(true);
1075        opts.create_missing_column_families(true);
1076        opts.set_compression_type(rocksdb::DBCompressionType::Lz4);
1077        opts.set_write_buffer_size(ROCKSDB_GRAPH_WRITE_BUFFER_BYTES);
1078        opts.set_max_write_buffer_number(2);
1079
1080        // Shared block cache for multi-tenant, small local for standalone/tests.
1081        use rocksdb::{BlockBasedOptions, Cache};
1082        let mut block_opts = BlockBasedOptions::default();
1083        let local_cache;
1084        let cache = match shared_cache {
1085            Some(c) => c,
1086            None => {
1087                local_cache = Cache::new_lru_cache(8 * 1024 * 1024); // 8MB standalone
1088                &local_cache
1089            }
1090        };
1091        block_opts.set_block_cache(cache);
1092        block_opts.set_cache_index_and_filter_blocks(true);
1093        opts.set_block_based_table_factory(&block_opts);
1094
1095        // Build column family descriptors — all CFs share the same options
1096        let cf_descriptors: Vec<ColumnFamilyDescriptor> = GRAPH_CF_NAMES
1097            .iter()
1098            .map(|name| ColumnFamilyDescriptor::new(*name, opts.clone()))
1099            .collect();
1100
1101        let db = Arc::new(DB::open_cf_descriptors(&opts, &graph_path, cf_descriptors)?);
1102
1103        // Migrate data from old separate-DB layout if needed
1104        let migrated = Self::migrate_from_separate_dbs(path, &db)?;
1105        if migrated > 0 {
1106            tracing::info!(
1107                "Migrated {} entries from separate graph DBs to column families",
1108                migrated
1109            );
1110        }
1111
1112        // Load entity name index from name_index CF (O(n) but faster than deserializing entities)
1113        // If empty, migrate from entities CF (one-time migration for existing data)
1114        let entity_name_index = Self::load_or_migrate_name_index(&db)?;
1115
1116        // Load/migrate lowercase index for O(1) case-insensitive lookup
1117        let entity_lowercase_index =
1118            Self::load_or_migrate_lowercase_index(&db, &entity_name_index)?;
1119
1120        // Load/migrate stemmed index for O(1) linguistic lookup
1121        let entity_stemmed_index = Self::load_or_migrate_stemmed_index(&db, &entity_name_index)?;
1122
1123        let entity_count = entity_name_index.len();
1124
1125        // Count relationships and episodes during startup (one-time cost)
1126        // This is O(n) at startup, but get_stats() will be O(1) at runtime
1127        let relationships_cf = db.cf_handle(CF_RELATIONSHIPS).unwrap();
1128        let episodes_cf = db.cf_handle(CF_EPISODES).unwrap();
1129        let relationship_count = Self::count_cf_entries(&db, relationships_cf);
1130        let episode_count = Self::count_cf_entries(&db, episodes_cf);
1131
1132        // Load entity embedding cache for concept merging
1133        // Only entities with pre-computed name_embeddings are cached
1134        let entities_cf = db.cf_handle(CF_ENTITIES).unwrap();
1135        let entity_embedding_cache =
1136            Self::load_entity_embedding_cache(&db, entities_cf, &entity_name_index);
1137        let embedding_cache_size = entity_embedding_cache.len();
1138
1139        let graph = Self {
1140            db,
1141            entity_name_index: Arc::new(parking_lot::RwLock::new(entity_name_index)),
1142            entity_lowercase_index: Arc::new(parking_lot::RwLock::new(entity_lowercase_index)),
1143            entity_stemmed_index: Arc::new(parking_lot::RwLock::new(entity_stemmed_index)),
1144            entity_count: Arc::new(AtomicUsize::new(entity_count)),
1145            relationship_count: Arc::new(AtomicUsize::new(relationship_count)),
1146            episode_count: Arc::new(AtomicUsize::new(episode_count)),
1147            synapse_update_lock: Arc::new(parking_lot::Mutex::new(())),
1148            entity_embedding_cache: Arc::new(parking_lot::RwLock::new(entity_embedding_cache)),
1149            pending_prune: parking_lot::Mutex::new(Vec::new()),
1150            pending_orphan_checks: parking_lot::Mutex::new(Vec::new()),
1151        };
1152
1153        if entity_count > 0 || relationship_count > 0 || episode_count > 0 {
1154            tracing::info!(
1155                "Loaded graph with {} entities ({} with embeddings), {} relationships, {} episodes",
1156                entity_count,
1157                embedding_cache_size,
1158                relationship_count,
1159                episode_count
1160            );
1161        }
1162
1163        Ok(graph)
1164    }
1165
1166    /// Migrate data from the old separate-DB layout (pre-CF) into column families.
1167    ///
1168    /// Detects old `graph_*` subdirectories, opens them read-only, copies all KV
1169    /// pairs into the corresponding CF, then renames the old directory for rollback safety.
1170    fn migrate_from_separate_dbs(base_path: &Path, db: &DB) -> Result<usize> {
1171        let old_dirs: &[(&str, &str)] = &[
1172            ("graph_entities", CF_ENTITIES),
1173            ("graph_relationships", CF_RELATIONSHIPS),
1174            ("graph_episodes", CF_EPISODES),
1175            ("graph_entity_edges", CF_ENTITY_EDGES),
1176            ("graph_entity_pair_index", CF_ENTITY_PAIR_INDEX),
1177            ("graph_entity_episodes", CF_ENTITY_EPISODES),
1178            ("graph_entity_name_index", CF_NAME_INDEX),
1179            ("graph_entity_lowercase_index", CF_LOWERCASE_INDEX),
1180            ("graph_entity_stemmed_index", CF_STEMMED_INDEX),
1181        ];
1182
1183        let mut total_migrated = 0usize;
1184
1185        for (old_name, cf_name) in old_dirs {
1186            let old_path = base_path.join(old_name);
1187            if !old_path.exists() {
1188                continue;
1189            }
1190
1191            let cf = db.cf_handle(cf_name).unwrap();
1192
1193            // Only migrate if the CF is empty (avoid double migration)
1194            if db
1195                .iterator_cf(cf, rocksdb::IteratorMode::Start)
1196                .next()
1197                .is_some()
1198            {
1199                // CF already has data — just rename the old dir
1200                let renamed = base_path.join(format!("{}.pre_cf_migration", old_name));
1201                if !renamed.exists() {
1202                    let _ = std::fs::rename(&old_path, &renamed);
1203                }
1204                continue;
1205            }
1206
1207            // Open old DB read-only and copy all entries
1208            let old_opts = Options::default();
1209            match DB::open_for_read_only(&old_opts, &old_path, false) {
1210                Ok(old_db) => {
1211                    let mut batch = WriteBatch::default();
1212                    let mut count = 0usize;
1213
1214                    for item in old_db.iterator(rocksdb::IteratorMode::Start) {
1215                        match item {
1216                            Ok((key, value)) => {
1217                                batch.put_cf(cf, &key, &value);
1218                                count += 1;
1219                                // Flush in chunks to limit memory usage
1220                                if count % 10_000 == 0 {
1221                                    db.write(std::mem::take(&mut batch))?;
1222                                    batch = WriteBatch::default();
1223                                }
1224                            }
1225                            Err(e) => {
1226                                tracing::warn!("Error reading from old {}: {}", old_name, e);
1227                                break;
1228                            }
1229                        }
1230                    }
1231
1232                    if count > 0 {
1233                        db.write(batch)?;
1234                    }
1235
1236                    drop(old_db);
1237
1238                    // Rename old directory for rollback safety
1239                    let renamed = base_path.join(format!("{}.pre_cf_migration", old_name));
1240                    if let Err(e) = std::fs::rename(&old_path, &renamed) {
1241                        tracing::warn!(
1242                            "Migrated {} entries from {} but failed to rename: {}",
1243                            count,
1244                            old_name,
1245                            e
1246                        );
1247                    } else {
1248                        tracing::info!(
1249                            "Migrated {} entries from {} to CF '{}'",
1250                            count,
1251                            old_name,
1252                            cf_name
1253                        );
1254                    }
1255
1256                    total_migrated += count;
1257                }
1258                Err(e) => {
1259                    tracing::warn!("Failed to open old DB {} for migration: {}", old_name, e);
1260                }
1261            }
1262        }
1263
1264        Ok(total_migrated)
1265    }
1266
1267    /// Load entity name->UUID index from name_index CF, or migrate from entities CF if empty
1268    fn load_or_migrate_name_index(db: &DB) -> Result<HashMap<String, Uuid>> {
1269        let name_index_cf = db.cf_handle(CF_NAME_INDEX).unwrap();
1270        let entities_cf = db.cf_handle(CF_ENTITIES).unwrap();
1271        let mut index = HashMap::new();
1272
1273        // Try to load from name_index CF first
1274        let iter = db.iterator_cf(name_index_cf, rocksdb::IteratorMode::Start);
1275        for (key, value) in iter.flatten() {
1276            if let (Ok(name), Ok(uuid_bytes)) = (
1277                std::str::from_utf8(&key),
1278                <[u8; 16]>::try_from(value.as_ref()),
1279            ) {
1280                index.insert(name.to_string(), Uuid::from_bytes(uuid_bytes));
1281            }
1282        }
1283
1284        // If name_index CF is empty but entities exist, migrate (one-time operation)
1285        if index.is_empty() {
1286            let entity_iter = db.iterator_cf(entities_cf, rocksdb::IteratorMode::Start);
1287            let mut migrated_count = 0;
1288            for (_, value) in entity_iter.flatten() {
1289                if let Ok(entity) = bincode::serde::decode_from_slice::<EntityNode, _>(
1290                    &value,
1291                    bincode::config::standard(),
1292                )
1293                .map(|(v, _)| v)
1294                {
1295                    // Store in name_index CF: name -> UUID bytes
1296                    db.put_cf(
1297                        name_index_cf,
1298                        entity.name.as_bytes(),
1299                        entity.uuid.as_bytes(),
1300                    )?;
1301                    index.insert(entity.name.clone(), entity.uuid);
1302                    migrated_count += 1;
1303                }
1304            }
1305            if migrated_count > 0 {
1306                tracing::info!("Migrated {} entities to name index CF", migrated_count);
1307            }
1308        }
1309
1310        Ok(index)
1311    }
1312
1313    /// Load lowercase name->UUID index, or migrate from name_index if empty
1314    ///
1315    /// This enables O(1) case-insensitive entity lookup instead of O(n) linear search.
1316    fn load_or_migrate_lowercase_index(
1317        db: &DB,
1318        name_index: &HashMap<String, Uuid>,
1319    ) -> Result<HashMap<String, Uuid>> {
1320        let lowercase_cf = db.cf_handle(CF_LOWERCASE_INDEX).unwrap();
1321        let mut index = HashMap::new();
1322
1323        // Try to load from lowercase_index CF
1324        let iter = db.iterator_cf(lowercase_cf, rocksdb::IteratorMode::Start);
1325        for (key, value) in iter.flatten() {
1326            if let (Ok(name), Ok(uuid_bytes)) = (
1327                std::str::from_utf8(&key),
1328                <[u8; 16]>::try_from(value.as_ref()),
1329            ) {
1330                index.insert(name.to_string(), Uuid::from_bytes(uuid_bytes));
1331            }
1332        }
1333
1334        // If empty but name_index has data, migrate (one-time operation)
1335        if index.is_empty() && !name_index.is_empty() {
1336            for (name, uuid) in name_index {
1337                let lowercase_name = name.to_lowercase();
1338                db.put_cf(lowercase_cf, lowercase_name.as_bytes(), uuid.as_bytes())?;
1339                index.insert(lowercase_name, *uuid);
1340            }
1341            tracing::info!(
1342                "Migrated {} entities to lowercase index CF",
1343                name_index.len()
1344            );
1345        }
1346
1347        Ok(index)
1348    }
1349
1350    /// Load stemmed name->UUID index, or migrate from name_index if empty
1351    ///
1352    /// This enables O(1) linguistic entity lookup: "running" matches "run"
1353    /// Uses Porter2 stemmer for English language stemming.
1354    fn load_or_migrate_stemmed_index(
1355        db: &DB,
1356        name_index: &HashMap<String, Uuid>,
1357    ) -> Result<HashMap<String, Uuid>> {
1358        let stemmed_cf = db.cf_handle(CF_STEMMED_INDEX).unwrap();
1359        let mut index = HashMap::new();
1360
1361        // Try to load from stemmed_index CF
1362        let iter = db.iterator_cf(stemmed_cf, rocksdb::IteratorMode::Start);
1363        for (key, value) in iter.flatten() {
1364            if let (Ok(name), Ok(uuid_bytes)) = (
1365                std::str::from_utf8(&key),
1366                <[u8; 16]>::try_from(value.as_ref()),
1367            ) {
1368                index.insert(name.to_string(), Uuid::from_bytes(uuid_bytes));
1369            }
1370        }
1371
1372        // If empty but name_index has data, migrate (one-time operation)
1373        if index.is_empty() && !name_index.is_empty() {
1374            let stemmer = Stemmer::create(Algorithm::English);
1375            for (name, uuid) in name_index {
1376                let stemmed_name = Self::stem_entity_name(&stemmer, name);
1377                db.put_cf(stemmed_cf, stemmed_name.as_bytes(), uuid.as_bytes())?;
1378                index.insert(stemmed_name, *uuid);
1379            }
1380            tracing::info!("Migrated {} entities to stemmed index CF", name_index.len());
1381        }
1382
1383        Ok(index)
1384    }
1385
1386    /// Stem an entity name for linguistic matching
1387    ///
1388    /// For multi-word names (e.g., "New York City"), stems each word and joins.
1389    /// Returns lowercase stemmed version for consistent matching.
1390    fn stem_entity_name(stemmer: &Stemmer, name: &str) -> String {
1391        name.split_whitespace()
1392            .map(|word| stemmer.stem(&word.to_lowercase()).to_string())
1393            .collect::<Vec<_>>()
1394            .join(" ")
1395    }
1396
1397    /// Count entries in a column family (one-time startup cost)
1398    fn count_cf_entries(db: &DB, cf: &ColumnFamily) -> usize {
1399        db.iterator_cf(cf, rocksdb::IteratorMode::Start).count()
1400    }
1401
1402    /// Load entity embedding cache from persisted entities.
1403    ///
1404    /// Scans entities referenced by the name index and collects those with
1405    /// pre-computed name_embeddings into an in-memory cache for O(n) concept
1406    /// merging during `add_entity()`. Entities without embeddings (pre-upgrade
1407    /// data) are skipped and will gain embeddings on their next mention.
1408    fn load_entity_embedding_cache(
1409        db: &DB,
1410        entities_cf: &ColumnFamily,
1411        name_index: &HashMap<String, Uuid>,
1412    ) -> Vec<(Uuid, Vec<f32>)> {
1413        let mut cache = Vec::with_capacity(ENTITY_EMBEDDING_CACHE_MAX.min(name_index.len()));
1414        for uuid in name_index.values() {
1415            let key = uuid.as_bytes();
1416            if let Ok(Some(value)) = db.get_cf(entities_cf, key) {
1417                if let Ok((entity, _)) = bincode::serde::decode_from_slice::<EntityNode, _>(
1418                    &value,
1419                    bincode::config::standard(),
1420                ) {
1421                    if let Some(emb) = entity.name_embedding {
1422                        cache.push((*uuid, emb));
1423                        if cache.len() >= ENTITY_EMBEDDING_CACHE_MAX {
1424                            break;
1425                        }
1426                    }
1427                }
1428            }
1429        }
1430        cache
1431    }
1432
1433    /// Add or update an entity node
1434    /// Salience is updated using the formula: salience = base_salience * (1 + 0.1 * ln(mention_count))
1435    /// This means frequently mentioned entities grow in salience (gravitational wells get heavier)
1436    ///
1437    /// BUG-002 FIX: Handles crash recovery for orphaned entities/stale indices
1438    pub fn add_entity(&self, mut entity: EntityNode) -> Result<Uuid> {
1439        // Multi-tier dedup pipeline: exact → case-insensitive → stemmed → embedding
1440        // Each tier is faster than the next; short-circuits on first match.
1441
1442        // Tier 1: Exact name match (O(1))
1443        let mut existing_uuid = {
1444            let index = self.entity_name_index.read();
1445            index.get(&entity.name).cloned()
1446        };
1447
1448        // Tier 2: Case-insensitive match (O(1))
1449        if existing_uuid.is_none() {
1450            let lowercase_name = entity.name.to_lowercase();
1451            let index = self.entity_lowercase_index.read();
1452            existing_uuid = index.get(&lowercase_name).cloned();
1453        }
1454
1455        // Tier 3: Stemmed match (O(1)) — "running" matches "run"
1456        // Skip for proper nouns to prevent "Paris" → "pari" merging with "Parison"
1457        if existing_uuid.is_none() && !entity.is_proper_noun {
1458            let stemmer = Stemmer::create(Algorithm::English);
1459            let stemmed_name = Self::stem_entity_name(&stemmer, &entity.name);
1460            let index = self.entity_stemmed_index.read();
1461            existing_uuid = index.get(&stemmed_name).cloned();
1462        }
1463
1464        // Tier 4: Embedding-based concept merge (O(n) over cache)
1465        // Catches synonyms like "authentication" ↔ "auth" that string matching misses.
1466        // Only runs when the entity carries a name_embedding (populated by caller).
1467        if existing_uuid.is_none() {
1468            if let Some(ref new_emb) = entity.name_embedding {
1469                let cache = self.entity_embedding_cache.read();
1470                let mut best_match: Option<(Uuid, f32)> = None;
1471                for (uuid, existing_emb) in cache.iter() {
1472                    let sim = crate::similarity::cosine_similarity(new_emb, existing_emb);
1473                    if sim >= ENTITY_CONCEPT_MERGE_THRESHOLD {
1474                        if best_match.map_or(true, |(_, best_sim)| sim > best_sim) {
1475                            best_match = Some((*uuid, sim));
1476                        }
1477                    }
1478                }
1479                if let Some((matched_uuid, sim)) = best_match {
1480                    tracing::debug!(
1481                        "Concept merge: '{}' matched existing entity {} (cosine={:.3})",
1482                        entity.name,
1483                        matched_uuid,
1484                        sim
1485                    );
1486                    existing_uuid = Some(matched_uuid);
1487                }
1488            }
1489        }
1490
1491        let is_new_entity;
1492        if let Some(uuid) = existing_uuid {
1493            // BUG-002 FIX: Verify entity actually exists in DB (handles stale index)
1494            if let Some(existing) = self.get_entity(&uuid)? {
1495                // Update existing entity — merge into canonical node
1496                entity.uuid = uuid;
1497                entity.mention_count = existing.mention_count + 1;
1498                entity.last_seen_at = Utc::now();
1499                entity.created_at = existing.created_at;
1500                entity.is_proper_noun = existing.is_proper_noun || entity.is_proper_noun;
1501
1502                // Preserve the canonical name (first-seen name wins)
1503                entity.name = existing.name.clone();
1504
1505                // Merge labels: preserve all observed entity types
1506                for label in &existing.labels {
1507                    if !entity.labels.contains(label) {
1508                        entity.labels.push(label.clone());
1509                    }
1510                }
1511
1512                // Preserve existing embedding if the incoming one is None
1513                if entity.name_embedding.is_none() {
1514                    entity.name_embedding = existing.name_embedding;
1515                }
1516
1517                // Update salience with frequency boost
1518                // Formula: salience = base_salience * (1 + 0.1 * ln(mention_count))
1519                // This caps at about 1.3x boost at 20 mentions
1520                let frequency_boost = 1.0 + 0.1 * (entity.mention_count as f32).ln();
1521                entity.salience = (existing.salience * frequency_boost).min(1.0);
1522                is_new_entity = false;
1523            } else {
1524                // BUG-002 FIX: Stale index entry - entity in index but not in DB
1525                tracing::warn!(
1526                    "Stale index entry for entity '{}' (uuid={}), recreating",
1527                    entity.name,
1528                    uuid
1529                );
1530                entity.uuid = Uuid::new_v4();
1531                entity.created_at = Utc::now();
1532                entity.last_seen_at = entity.created_at;
1533                entity.mention_count = 1;
1534                is_new_entity = true;
1535            }
1536        } else {
1537            // Genuinely new entity — no match at any tier
1538            entity.uuid = Uuid::new_v4();
1539            entity.created_at = Utc::now();
1540            entity.last_seen_at = entity.created_at;
1541            entity.mention_count = 1;
1542            is_new_entity = true;
1543        }
1544
1545        // BUG-002 FIX: Write index FIRST, then entity
1546        let lowercase_name = entity.name.to_lowercase();
1547        let stemmer = Stemmer::create(Algorithm::English);
1548        let stemmed_name = Self::stem_entity_name(&stemmer, &entity.name);
1549
1550        // Update in-memory indices
1551        {
1552            let mut index = self.entity_name_index.write();
1553            index.insert(entity.name.clone(), entity.uuid);
1554        }
1555        {
1556            let mut lowercase_index = self.entity_lowercase_index.write();
1557            lowercase_index.insert(lowercase_name.clone(), entity.uuid);
1558        }
1559        // Skip stemmed index for proper nouns to prevent "Paris" → "pari" collisions
1560        if !entity.is_proper_noun {
1561            let mut stemmed_index = self.entity_stemmed_index.write();
1562            stemmed_index.insert(stemmed_name.clone(), entity.uuid);
1563        }
1564
1565        // Update entity embedding cache for future concept merges
1566        if let Some(ref emb) = entity.name_embedding {
1567            let mut cache = self.entity_embedding_cache.write();
1568            if is_new_entity {
1569                cache.push((entity.uuid, emb.clone()));
1570                // Evict oldest entries when cache exceeds the configured maximum.
1571                // Oldest entries (index 0) are typically the least recently mentioned
1572                // since they were loaded at startup or added earliest.
1573                if cache.len() > ENTITY_EMBEDDING_CACHE_MAX {
1574                    let excess = cache.len() - ENTITY_EMBEDDING_CACHE_MAX;
1575                    cache.drain(..excess);
1576                }
1577            } else {
1578                // Update existing entry in cache (embedding may have changed)
1579                if let Some(entry) = cache.iter_mut().find(|(uuid, _)| *uuid == entity.uuid) {
1580                    entry.1 = emb.clone();
1581                }
1582            }
1583        }
1584
1585        // Persist name->UUID mappings
1586        self.db.put_cf(
1587            self.name_index_cf(),
1588            entity.name.as_bytes(),
1589            entity.uuid.as_bytes(),
1590        )?;
1591        self.db.put_cf(
1592            self.lowercase_index_cf(),
1593            lowercase_name.as_bytes(),
1594            entity.uuid.as_bytes(),
1595        )?;
1596        if !entity.is_proper_noun {
1597            self.db.put_cf(
1598                self.stemmed_index_cf(),
1599                stemmed_name.as_bytes(),
1600                entity.uuid.as_bytes(),
1601            )?;
1602        }
1603
1604        // Store entity in database
1605        let key = entity.uuid.as_bytes();
1606        let value = bincode::serde::encode_to_vec(&entity, bincode::config::standard())?;
1607        self.db.put_cf(self.entities_cf(), key, value)?;
1608
1609        // Increment counter only for truly new entities
1610        if is_new_entity {
1611            self.entity_count.fetch_add(1, Ordering::Relaxed);
1612        }
1613
1614        Ok(entity.uuid)
1615    }
1616
1617    /// Get entity by UUID
1618    pub fn get_entity(&self, uuid: &Uuid) -> Result<Option<EntityNode>> {
1619        let key = uuid.as_bytes();
1620        match self.db.get_cf(self.entities_cf(), key)? {
1621            Some(value) => {
1622                let (entity, _): (EntityNode, _) =
1623                    bincode::serde::decode_from_slice(&value, bincode::config::standard())?;
1624                Ok(Some(entity))
1625            }
1626            None => Ok(None),
1627        }
1628    }
1629
1630    /// Delete an entity and all its index entries.
1631    ///
1632    /// Removes the entity from:
1633    /// 1. `entities` CF (primary storage)
1634    /// 2. `entity_name_index` (exact name → UUID)
1635    /// 3. `entity_lowercase_index` (lowercase name → UUID)
1636    /// 4. `entity_stemmed_index` (stemmed name → UUID)
1637    /// 5. `entity_embedding_cache` (in-memory embedding vector)
1638    /// 6. `entity_pair_index` CF (co-occurrence pair entries)
1639    /// 7. Decrements `entity_count`
1640    ///
1641    /// Returns true if the entity existed and was deleted.
1642    pub fn delete_entity(&self, uuid: &Uuid) -> Result<bool> {
1643        let entity = match self.get_entity(uuid)? {
1644            Some(e) => e,
1645            None => return Ok(false),
1646        };
1647
1648        // 1. Remove from entities CF
1649        self.db.delete_cf(self.entities_cf(), uuid.as_bytes())?;
1650
1651        // 2-3-4. Remove from name indices (in-memory + persisted)
1652        let lowercase_name = entity.name.to_lowercase();
1653        let stemmer = Stemmer::create(Algorithm::English);
1654        let stemmed_name = Self::stem_entity_name(&stemmer, &entity.name);
1655
1656        {
1657            let mut index = self.entity_name_index.write();
1658            index.remove(&entity.name);
1659        }
1660        self.db
1661            .delete_cf(self.name_index_cf(), entity.name.as_bytes())?;
1662
1663        {
1664            let mut index = self.entity_lowercase_index.write();
1665            index.remove(&lowercase_name);
1666        }
1667        self.db
1668            .delete_cf(self.lowercase_index_cf(), lowercase_name.as_bytes())?;
1669
1670        {
1671            let mut index = self.entity_stemmed_index.write();
1672            index.remove(&stemmed_name);
1673        }
1674        self.db
1675            .delete_cf(self.stemmed_index_cf(), stemmed_name.as_bytes())?;
1676
1677        // 5. Remove from embedding cache
1678        {
1679            let mut cache = self.entity_embedding_cache.write();
1680            cache.retain(|(id, _)| id != uuid);
1681        }
1682
1683        // 6. Remove entity_pair_index entries (prefix scan)
1684        let prefix = format!("{}:", uuid);
1685        let mut pairs_to_delete = Vec::new();
1686        let iter = self
1687            .db
1688            .prefix_iterator_cf(self.entity_pair_index_cf(), prefix.as_bytes());
1689        for item in iter {
1690            match item {
1691                Ok((key, _)) => {
1692                    let key_str = String::from_utf8_lossy(&key);
1693                    if key_str.starts_with(&prefix) {
1694                        pairs_to_delete.push(key.to_vec());
1695                    } else {
1696                        break;
1697                    }
1698                }
1699                Err(_) => break,
1700            }
1701        }
1702        // Also scan for reverse direction (other_uuid:this_uuid)
1703        let suffix = format!(":{}", uuid);
1704        let iter = self
1705            .db
1706            .iterator_cf(self.entity_pair_index_cf(), rocksdb::IteratorMode::Start);
1707        for item in iter {
1708            match item {
1709                Ok((key, _)) => {
1710                    let key_str = String::from_utf8_lossy(&key);
1711                    if key_str.ends_with(&suffix) {
1712                        pairs_to_delete.push(key.to_vec());
1713                    }
1714                }
1715                Err(_) => break,
1716            }
1717        }
1718        for key in &pairs_to_delete {
1719            self.db.delete_cf(self.entity_pair_index_cf(), key)?;
1720        }
1721
1722        // 7. Decrement counter
1723        self.entity_count.fetch_sub(1, Ordering::Relaxed);
1724
1725        tracing::debug!("Deleted orphaned entity '{}' (uuid={})", entity.name, uuid);
1726        Ok(true)
1727    }
1728
1729    /// Find entity by name (case-insensitive, O(1) lookup)
1730    ///
1731    /// Uses a multi-tier matching strategy:
1732    /// 1. Exact match (O(1)) - fastest
1733    /// 2. Case-insensitive match (O(1)) - common case
1734    /// 3. Stemmed match (O(1)) - "running" matches "run"
1735    /// 4. Substring match - "York" matches "New York City"
1736    /// 5. Word-level match - "York" matches "New York"
1737    pub fn find_entity_by_name(&self, name: &str) -> Result<Option<EntityNode>> {
1738        // Tier 1: Exact match (O(1))
1739        let uuid = {
1740            let index = self.entity_name_index.read();
1741            index.get(name).copied()
1742        };
1743
1744        if let Some(uuid) = uuid {
1745            return self.get_entity(&uuid);
1746        }
1747
1748        // Tier 2: Case-insensitive match (O(1))
1749        let name_lower = name.to_lowercase();
1750        let uuid = {
1751            let lowercase_index = self.entity_lowercase_index.read();
1752            lowercase_index.get(&name_lower).copied()
1753        };
1754
1755        if let Some(uuid) = uuid {
1756            return self.get_entity(&uuid);
1757        }
1758
1759        // Tier 3: Stemmed match (O(1)) - "running" matches "run", "conversations" matches "conversation"
1760        let stemmer = Stemmer::create(Algorithm::English);
1761        let stemmed_name = Self::stem_entity_name(&stemmer, name);
1762        let uuid = {
1763            let stemmed_index = self.entity_stemmed_index.read();
1764            stemmed_index.get(&stemmed_name).copied()
1765        };
1766
1767        if let Some(uuid) = uuid {
1768            return self.get_entity(&uuid);
1769        }
1770
1771        // Tier 4 & 5: Fuzzy matching (O(n) but bounded)
1772        // Only do fuzzy matching for names >= 3 chars to avoid noise
1773        // Deterministic: collect ALL matches, pick highest salience (break ties by shortest name)
1774        if name.len() >= 3 {
1775            let lowercase_index = self.entity_lowercase_index.read();
1776            let mut candidates: Vec<(Uuid, String)> = Vec::new();
1777
1778            // Tier 4: Substring match - query is substring of entity
1779            // e.g., "York" matches "New York City"
1780            for (entity_name, uuid) in lowercase_index.iter() {
1781                if entity_name.contains(&name_lower) {
1782                    candidates.push((*uuid, entity_name.clone()));
1783                }
1784            }
1785
1786            // Tier 5: Word-level match (only if Tier 4 found nothing)
1787            if candidates.is_empty() {
1788                let query_words: Vec<&str> = name_lower.split_whitespace().collect();
1789                for (entity_name, uuid) in lowercase_index.iter() {
1790                    let entity_words: Vec<&str> = entity_name.split_whitespace().collect();
1791                    for qw in &query_words {
1792                        if entity_words.iter().any(|ew| ew == qw || ew.starts_with(qw)) {
1793                            candidates.push((*uuid, entity_name.clone()));
1794                            break;
1795                        }
1796                    }
1797                }
1798            }
1799
1800            // Pick best candidate: highest salience, then shortest name for ties
1801            if !candidates.is_empty() {
1802                let mut best: Option<(Uuid, f32, usize)> = None; // (uuid, salience, name_len)
1803                for (uuid, name) in &candidates {
1804                    let salience = self.get_entity(uuid)?.map(|e| e.salience).unwrap_or(0.0);
1805                    match &best {
1806                        Some((_, best_sal, best_len))
1807                            if salience > *best_sal
1808                                || (salience == *best_sal && name.len() < *best_len) =>
1809                        {
1810                            best = Some((*uuid, salience, name.len()));
1811                        }
1812                        None => {
1813                            best = Some((*uuid, salience, name.len()));
1814                        }
1815                        _ => {}
1816                    }
1817                }
1818                if let Some((uuid, _, _)) = best {
1819                    return self.get_entity(&uuid);
1820                }
1821            }
1822        }
1823
1824        Ok(None)
1825    }
1826
1827    /// Find all entities matching a name with fuzzy matching
1828    ///
1829    /// Returns multiple matches ranked by match quality.
1830    /// Useful for spreading activation across related entities.
1831    pub fn find_entities_fuzzy(&self, name: &str, max_results: usize) -> Result<Vec<EntityNode>> {
1832        let mut results = Vec::new();
1833        let name_lower = name.to_lowercase();
1834
1835        // Skip very short queries
1836        if name.len() < 2 {
1837            return Ok(results);
1838        }
1839
1840        let lowercase_index = self.entity_lowercase_index.read();
1841
1842        // Score and collect matches
1843        let mut scored: Vec<(Uuid, f32)> = Vec::new();
1844
1845        for (entity_name, uuid) in lowercase_index.iter() {
1846            let score = if entity_name == &name_lower {
1847                1.0 // Exact match
1848            } else if entity_name.starts_with(&name_lower) {
1849                0.9 // Prefix match
1850            } else if entity_name.contains(&name_lower) {
1851                0.7 // Substring match
1852            } else {
1853                // Word-level match
1854                let entity_words: Vec<&str> = entity_name.split_whitespace().collect();
1855                let query_words: Vec<&str> = name_lower.split_whitespace().collect();
1856
1857                let mut word_score: f32 = 0.0;
1858                for qw in &query_words {
1859                    for ew in &entity_words {
1860                        if ew == qw {
1861                            word_score += 0.5;
1862                        } else if ew.starts_with(qw) {
1863                            word_score += 0.3;
1864                        }
1865                    }
1866                }
1867                word_score.min(0.6) // Cap word-level score
1868            };
1869
1870            if score > 0.0 {
1871                scored.push((*uuid, score));
1872            }
1873        }
1874
1875        // Sort by score descending
1876        scored.sort_by(|a, b| b.1.total_cmp(&a.1));
1877
1878        // Take top results
1879        for (uuid, _score) in scored.into_iter().take(max_results) {
1880            if let Some(entity) = self.get_entity(&uuid)? {
1881                results.push(entity);
1882            }
1883        }
1884
1885        Ok(results)
1886    }
1887
1888    /// Canonical pair key for the entity-pair index.
1889    /// Uses min/max UUID ordering so A→B and B→A produce the same key.
1890    fn pair_key(entity_a: &Uuid, entity_b: &Uuid) -> String {
1891        if entity_a < entity_b {
1892            format!("{entity_a}:{entity_b}")
1893        } else {
1894            format!("{entity_b}:{entity_a}")
1895        }
1896    }
1897
1898    /// Index an entity pair → edge UUID for O(1) dedup lookups
1899    fn index_entity_pair(&self, entity_a: &Uuid, entity_b: &Uuid, edge_uuid: &Uuid) -> Result<()> {
1900        let key = Self::pair_key(entity_a, entity_b);
1901        self.db.put_cf(
1902            self.entity_pair_index_cf(),
1903            key.as_bytes(),
1904            edge_uuid.as_bytes(),
1905        )?;
1906        Ok(())
1907    }
1908
1909    /// Remove entity pair from the pair index
1910    fn remove_entity_pair_index(&self, entity_a: &Uuid, entity_b: &Uuid) -> Result<()> {
1911        let key = Self::pair_key(entity_a, entity_b);
1912        self.db
1913            .delete_cf(self.entity_pair_index_cf(), key.as_bytes())?;
1914        Ok(())
1915    }
1916
1917    /// Find existing relationship between two entities (either direction)
1918    ///
1919    /// O(1) lookup via entity-pair index, with fallback to linear scan
1920    /// for edges created before the pair index existed (migration path).
1921    pub fn find_relationship_between(
1922        &self,
1923        entity_a: &Uuid,
1924        entity_b: &Uuid,
1925    ) -> Result<Option<RelationshipEdge>> {
1926        // Fast path: O(1) pair index lookup
1927        let key = Self::pair_key(entity_a, entity_b);
1928        if let Some(edge_uuid_bytes) = self
1929            .db
1930            .get_cf(self.entity_pair_index_cf(), key.as_bytes())?
1931        {
1932            if edge_uuid_bytes.len() == 16 {
1933                let edge_uuid = Uuid::from_slice(&edge_uuid_bytes)?;
1934                if let Some(edge) = self.get_relationship(&edge_uuid)? {
1935                    return Ok(Some(edge));
1936                }
1937                // Edge was deleted but pair index is stale — clean up and fall through
1938                let _ = self
1939                    .db
1940                    .delete_cf(self.entity_pair_index_cf(), key.as_bytes());
1941            }
1942        }
1943
1944        // Slow path: linear scan for pre-index edges (backward compatibility)
1945        // This path is only hit for edges created before the pair index existed.
1946        // Once all old edges are either strengthened (which updates the index) or
1947        // pruned, this path becomes dead code.
1948        let edges_a = self.get_entity_relationships(entity_a)?;
1949        for edge in edges_a {
1950            if (edge.from_entity == *entity_a && edge.to_entity == *entity_b)
1951                || (edge.from_entity == *entity_b && edge.to_entity == *entity_a)
1952            {
1953                // Backfill pair index for this legacy edge
1954                let _ = self.index_entity_pair(entity_a, entity_b, &edge.uuid);
1955                return Ok(Some(edge));
1956            }
1957        }
1958        Ok(None)
1959    }
1960
1961    /// Find existing relationship between two entities with a specific relation type.
1962    ///
1963    /// Unlike `find_relationship_between` which returns any edge between the pair,
1964    /// this method only matches edges with the same `RelationType`. This allows
1965    /// multiple semantically distinct edges (e.g. WorksWith + PartOf) between
1966    /// the same entity pair.
1967    pub fn find_relationship_between_typed(
1968        &self,
1969        entity_a: &Uuid,
1970        entity_b: &Uuid,
1971        relation_type: &RelationType,
1972    ) -> Result<Option<RelationshipEdge>> {
1973        let edges = self.get_entity_relationships(entity_a)?;
1974        for edge in edges {
1975            if edge.relation_type == *relation_type
1976                && ((edge.from_entity == *entity_a && edge.to_entity == *entity_b)
1977                    || (edge.from_entity == *entity_b && edge.to_entity == *entity_a))
1978            {
1979                return Ok(Some(edge));
1980            }
1981        }
1982        Ok(None)
1983    }
1984
1985    /// Add a relationship edge (or strengthen existing one)
1986    ///
1987    /// If an edge already exists between the two entities, strengthens it
1988    /// instead of creating a duplicate. This implements proper Hebbian learning:
1989    /// "neurons that fire together, wire together" - repeated co-occurrence
1990    /// strengthens the same synapse rather than creating parallel connections.
1991    pub fn add_relationship(&self, mut edge: RelationshipEdge) -> Result<Uuid> {
1992        // Check for existing relationship between these entities WITH SAME TYPE
1993        // Different relation types (e.g. WorksWith vs PartOf) are distinct edges
1994        if let Some(mut existing) = self.find_relationship_between_typed(
1995            &edge.from_entity,
1996            &edge.to_entity,
1997            &edge.relation_type,
1998        )? {
1999            // Strengthen existing edge instead of creating duplicate
2000            let _ = existing.strengthen();
2001            existing.last_activated = Utc::now();
2002
2003            // Update context if new context is more informative
2004            if edge.context.len() > existing.context.len() {
2005                existing.context = edge.context;
2006            }
2007
2008            // Persist the strengthened edge
2009            let key = existing.uuid.as_bytes();
2010            let value = bincode::serde::encode_to_vec(&existing, bincode::config::standard())?;
2011            self.db.put_cf(self.relationships_cf(), key, value)?;
2012
2013            return Ok(existing.uuid);
2014        }
2015
2016        // No existing edge - create new one
2017        edge.uuid = Uuid::new_v4();
2018        edge.created_at = Utc::now();
2019
2020        // Store relationship
2021        let key = edge.uuid.as_bytes();
2022        let value = bincode::serde::encode_to_vec(&edge, bincode::config::standard())?;
2023        self.db.put_cf(self.relationships_cf(), key, value)?;
2024
2025        // Increment relationship counter
2026        self.relationship_count.fetch_add(1, Ordering::Relaxed);
2027
2028        // Update entity->edges index for both entities
2029        self.index_entity_edge(&edge.from_entity, &edge.uuid)?;
2030        self.index_entity_edge(&edge.to_entity, &edge.uuid)?;
2031
2032        // Update entity-pair index for O(1) dedup lookups
2033        self.index_entity_pair(&edge.from_entity, &edge.to_entity, &edge.uuid)?;
2034
2035        // Insert-time degree pruning: cap edges per entity to prevent O(n²) explosion.
2036        // If either entity exceeds MAX_ENTITY_DEGREE, prune the weakest edges.
2037        // This is the primary defense against graph bloat (132MB for 600KB of content).
2038        self.prune_entity_if_over_degree(&edge.from_entity)?;
2039        self.prune_entity_if_over_degree(&edge.to_entity)?;
2040
2041        Ok(edge.uuid)
2042    }
2043
2044    /// Index an edge for an entity
2045    fn index_entity_edge(&self, entity_uuid: &Uuid, edge_uuid: &Uuid) -> Result<()> {
2046        let key = format!("{entity_uuid}:{edge_uuid}");
2047        self.db
2048            .put_cf(self.entity_edges_cf(), key.as_bytes(), b"1")?;
2049        Ok(())
2050    }
2051
2052    /// Prune an entity's edges if degree exceeds MAX_ENTITY_DEGREE
2053    ///
2054    /// Loads all edges for the entity, sorts by effective strength, and deletes
2055    /// the weakest edges that exceed the cap. LTP-protected edges are preserved
2056    /// preferentially (sorted last, so they survive pruning).
2057    ///
2058    /// This is called at insert time to prevent unbounded edge growth.
2059    /// Amortized cost: O(1) for most insertions (only triggers when over cap),
2060    /// O(d log d) when pruning is needed (d = entity degree).
2061    fn prune_entity_if_over_degree(&self, entity_uuid: &Uuid) -> Result<()> {
2062        use crate::constants::MAX_ENTITY_DEGREE;
2063
2064        // Fast path: count edges without loading them
2065        let prefix = format!("{entity_uuid}:");
2066        let iter = self
2067            .db
2068            .prefix_iterator_cf(self.entity_edges_cf(), prefix.as_bytes());
2069
2070        let mut edge_count = 0usize;
2071        for (key, _) in iter.flatten() {
2072            if let Ok(key_str) = std::str::from_utf8(&key) {
2073                if !key_str.starts_with(&prefix) {
2074                    break;
2075                }
2076                edge_count += 1;
2077            }
2078        }
2079
2080        if edge_count <= MAX_ENTITY_DEGREE {
2081            return Ok(());
2082        }
2083
2084        // Over cap — load all edges, sort, prune weakest
2085        let all_edges = self.get_entity_relationships(entity_uuid)?;
2086        if all_edges.len() <= MAX_ENTITY_DEGREE {
2087            return Ok(()); // Race condition guard
2088        }
2089
2090        // Sort by pruning priority: LTP-protected edges last (survive pruning),
2091        // then by effective strength descending (strongest survive)
2092        let mut scored: Vec<(Uuid, f32, bool)> = all_edges
2093            .iter()
2094            .map(|e| {
2095                let is_protected = e.is_potentiated();
2096                (e.uuid, e.effective_strength(), is_protected)
2097            })
2098            .collect();
2099
2100        // Sort: unprotected+weak first (pruning candidates), protected+strong last (survivors)
2101        scored.sort_by(|a, b| {
2102            // Protected edges sort after unprotected
2103            match a.2.cmp(&b.2) {
2104                CmpOrdering::Equal => {
2105                    // Within same protection class, weaker edges first (prune candidates)
2106                    a.1.total_cmp(&b.1)
2107                }
2108                other => other,
2109            }
2110        });
2111
2112        // Prune excess: first N edges in sorted order are weakest/unprotected
2113        let prune_count = scored.len() - MAX_ENTITY_DEGREE;
2114        let to_prune: Vec<Uuid> = scored.iter().take(prune_count).map(|s| s.0).collect();
2115
2116        for edge_uuid in &to_prune {
2117            if let Err(e) = self.delete_relationship(edge_uuid) {
2118                tracing::warn!(
2119                    edge = %edge_uuid,
2120                    entity = %entity_uuid,
2121                    "Failed to prune edge during degree cap: {}",
2122                    e
2123                );
2124            }
2125        }
2126
2127        if !to_prune.is_empty() {
2128            tracing::info!(
2129                entity = %entity_uuid,
2130                pruned = to_prune.len(),
2131                remaining = MAX_ENTITY_DEGREE,
2132                "Pruned edges exceeding degree cap"
2133            );
2134        }
2135
2136        Ok(())
2137    }
2138
2139    /// Get relationships for an entity with optional limit
2140    ///
2141    /// Uses batch reading (multi_get) to eliminate N+1 query pattern.
2142    /// If limit is None, returns all edges (use sparingly for large graphs).
2143    pub fn get_entity_relationships(&self, entity_uuid: &Uuid) -> Result<Vec<RelationshipEdge>> {
2144        self.get_entity_relationships_limited(entity_uuid, None)
2145    }
2146
2147    /// Get relationships for an entity with limit, ordered by effective strength
2148    ///
2149    /// Collects ALL edge UUIDs first, batch-reads them, sorts by effective_strength
2150    /// descending, then returns the top `limit` strongest edges. This ensures
2151    /// traversal and queries always use the most valuable connections.
2152    ///
2153    /// When no limit is specified, returns all edges sorted by strength.
2154    pub fn get_entity_relationships_limited(
2155        &self,
2156        entity_uuid: &Uuid,
2157        limit: Option<usize>,
2158    ) -> Result<Vec<RelationshipEdge>> {
2159        let prefix = format!("{entity_uuid}:");
2160
2161        // Phase 1: Collect ALL edge UUIDs from index (fast prefix scan)
2162        // We must read all to sort by strength — storage order is arbitrary
2163        let mut edge_uuids: Vec<Uuid> = Vec::with_capacity(256);
2164        let iter = self
2165            .db
2166            .prefix_iterator_cf(self.entity_edges_cf(), prefix.as_bytes());
2167
2168        for (key, _) in iter.flatten() {
2169            if let Ok(key_str) = std::str::from_utf8(&key) {
2170                if !key_str.starts_with(&prefix) {
2171                    break;
2172                }
2173
2174                if let Some(edge_uuid_str) = key_str.split(':').nth(1) {
2175                    if let Ok(edge_uuid) = Uuid::parse_str(edge_uuid_str) {
2176                        edge_uuids.push(edge_uuid);
2177                    }
2178                }
2179            }
2180        }
2181
2182        if edge_uuids.is_empty() {
2183            return Ok(Vec::new());
2184        }
2185
2186        // Phase 2: Batch read all edges using multi_get (single RocksDB call)
2187        let keys: Vec<[u8; 16]> = edge_uuids.iter().map(|u| *u.as_bytes()).collect();
2188        let key_refs: Vec<&[u8]> = keys.iter().map(|k| k.as_slice()).collect();
2189
2190        let results = self
2191            .db
2192            .batched_multi_get_cf(self.relationships_cf(), &key_refs, false);
2193
2194        let mut edges = Vec::with_capacity(edge_uuids.len());
2195        for value in results.into_iter().flatten().flatten() {
2196            if let Ok((edge, _)) = bincode::serde::decode_from_slice::<RelationshipEdge, _>(
2197                &value,
2198                bincode::config::standard(),
2199            ) {
2200                edges.push(edge);
2201            }
2202        }
2203
2204        // Phase 3: Sort by effective strength descending (strongest first)
2205        // Snapshot strengths BEFORE sorting — effective_strength() calls Utc::now()
2206        // internally, so repeated calls during sort can return different values,
2207        // violating total ordering (Rust 1.81+ panics on this).
2208        let mut strength_cache: HashMap<Uuid, f32> = HashMap::with_capacity(edges.len());
2209        for edge in &edges {
2210            strength_cache.insert(edge.uuid, edge.effective_strength());
2211        }
2212        edges.sort_by(|a, b| {
2213            let sa = strength_cache.get(&a.uuid).copied().unwrap_or(0.0);
2214            let sb = strength_cache.get(&b.uuid).copied().unwrap_or(0.0);
2215            sb.total_cmp(&sa)
2216        });
2217
2218        // Phase 3.5: Opportunistic pruning — queue edges that have decayed below
2219        // their tier's threshold for batch deletion on next maintenance cycle.
2220        // This replaces the eager full-scan apply_decay() with lazy on-read pruning.
2221        let mut has_prunable = false;
2222        for edge in &edges {
2223            if edge.effective_strength() < edge.tier.prune_threshold()
2224                && !edge.ltp_status.is_potentiated()
2225            {
2226                has_prunable = true;
2227                break;
2228            }
2229        }
2230        if has_prunable {
2231            let mut prune_queue = self.pending_prune.lock();
2232            let mut orphan_queue = self.pending_orphan_checks.lock();
2233            // Prevent unbounded growth — these queues are drained by maintenance,
2234            // but if maintenance hasn't run, cap to avoid increasing lock contention.
2235            if prune_queue.len() > 1000 {
2236                tracing::debug!(
2237                    "Prune queue overflow ({}) — clearing to prevent lock contention",
2238                    prune_queue.len()
2239                );
2240                prune_queue.clear();
2241            }
2242            if orphan_queue.len() > 2000 {
2243                orphan_queue.clear();
2244            }
2245            edges.retain(|edge| {
2246                if edge.effective_strength() < edge.tier.prune_threshold()
2247                    && !edge.ltp_status.is_potentiated()
2248                {
2249                    prune_queue.push(edge.uuid);
2250                    orphan_queue.push(edge.from_entity);
2251                    orphan_queue.push(edge.to_entity);
2252                    false // remove from results
2253                } else {
2254                    true
2255                }
2256            });
2257        }
2258
2259        // Phase 4: Truncate to limit if specified
2260        if let Some(max) = limit {
2261            edges.truncate(max);
2262        }
2263
2264        Ok(edges)
2265    }
2266
2267    /// Calculate edge density for a specific entity (SHO-D5)
2268    ///
2269    /// Returns the number of edges connected to this entity.
2270    /// Used for per-entity density calculation: dense entities use vector search,
2271    /// sparse entities use graph search.
2272    ///
2273    /// This is an O(1) prefix count operation.
2274    pub fn entity_edge_count(&self, entity_uuid: &Uuid) -> Result<usize> {
2275        let prefix = format!("{entity_uuid}:");
2276        let iter = self
2277            .db
2278            .prefix_iterator_cf(self.entity_edges_cf(), prefix.as_bytes());
2279
2280        let mut count = 0;
2281        for (key, _) in iter.flatten() {
2282            if let Ok(key_str) = std::str::from_utf8(&key) {
2283                if !key_str.starts_with(&prefix) {
2284                    break;
2285                }
2286                count += 1;
2287            }
2288        }
2289
2290        Ok(count)
2291    }
2292
2293    /// Calculate average edge density for a set of entities (SHO-D5)
2294    ///
2295    /// Returns the mean number of edges per entity for the given UUIDs.
2296    /// Used to determine optimal retrieval strategy:
2297    /// - Low density (<5 edges): Trust graph search (sparse, high-signal)
2298    /// - High density (>20 edges): Trust vector search (dense, noisy)
2299    ///
2300    /// Returns None if no entities provided.
2301    pub fn entities_average_density(&self, entity_uuids: &[Uuid]) -> Result<Option<f32>> {
2302        if entity_uuids.is_empty() {
2303            return Ok(None);
2304        }
2305
2306        let mut total_edges = 0usize;
2307        for uuid in entity_uuids {
2308            total_edges += self.entity_edge_count(uuid)?;
2309        }
2310
2311        Ok(Some(total_edges as f32 / entity_uuids.len() as f32))
2312    }
2313
2314    /// Calculate edge density per tier for a specific entity (SHO-D5)
2315    ///
2316    /// Returns counts of edges by tier: (L1_count, L2_count, L3_count, LTP_count)
2317    /// Useful for understanding if an entity's graph is consolidated (mostly L3/LTP)
2318    /// or still noisy (mostly L1).
2319    pub fn entity_density_by_tier(
2320        &self,
2321        entity_uuid: &Uuid,
2322    ) -> Result<(usize, usize, usize, usize)> {
2323        let edges = self.get_entity_relationships(entity_uuid)?;
2324
2325        let mut l1_count = 0;
2326        let mut l2_count = 0;
2327        let mut l3_count = 0;
2328        let mut ltp_count = 0;
2329
2330        for edge in edges {
2331            if edge.is_potentiated() {
2332                ltp_count += 1;
2333            } else {
2334                match edge.tier {
2335                    EdgeTier::L1Working => l1_count += 1,
2336                    EdgeTier::L2Episodic => l2_count += 1,
2337                    EdgeTier::L3Semantic => l3_count += 1,
2338                }
2339            }
2340        }
2341
2342        Ok((l1_count, l2_count, l3_count, ltp_count))
2343    }
2344
2345    /// Calculate consolidated ratio for an entity (SHO-D5)
2346    ///
2347    /// Returns the ratio of consolidated edges (L2 + L3 + LTP) to total edges.
2348    /// High ratio (>0.7) = trust graph search, Low ratio (<0.3) = trust vector search.
2349    ///
2350    /// Returns None if entity has no edges.
2351    pub fn entity_consolidation_ratio(&self, entity_uuid: &Uuid) -> Result<Option<f32>> {
2352        let (l1, l2, l3, ltp) = self.entity_density_by_tier(entity_uuid)?;
2353        let total = l1 + l2 + l3 + ltp;
2354
2355        if total == 0 {
2356            return Ok(None);
2357        }
2358
2359        let consolidated = l2 + l3 + ltp;
2360        Ok(Some(consolidated as f32 / total as f32))
2361    }
2362
2363    /// Get relationship by UUID (raw, without decay applied)
2364    pub fn get_relationship(&self, uuid: &Uuid) -> Result<Option<RelationshipEdge>> {
2365        let key = uuid.as_bytes();
2366        match self.db.get_cf(self.relationships_cf(), key)? {
2367            Some(value) => {
2368                let (edge, _): (RelationshipEdge, _) =
2369                    bincode::serde::decode_from_slice(&value, bincode::config::standard())?;
2370                Ok(Some(edge))
2371            }
2372            None => Ok(None),
2373        }
2374    }
2375
2376    /// Get relationship by UUID with effective strength (lazy decay calculation)
2377    ///
2378    /// Returns the edge with strength reflecting time-based decay.
2379    /// This doesn't persist the decay - just calculates what the strength would be.
2380    /// Use this for API responses to show accurate current strength.
2381    pub fn get_relationship_with_effective_strength(
2382        &self,
2383        uuid: &Uuid,
2384    ) -> Result<Option<RelationshipEdge>> {
2385        let key = uuid.as_bytes();
2386        match self.db.get_cf(self.relationships_cf(), key)? {
2387            Some(value) => {
2388                let (mut edge, _): (RelationshipEdge, _) =
2389                    bincode::serde::decode_from_slice(&value, bincode::config::standard())?;
2390                // Apply effective strength calculation (doesn't persist)
2391                edge.strength = edge.effective_strength();
2392                Ok(Some(edge))
2393            }
2394            None => Ok(None),
2395        }
2396    }
2397
2398    /// Delete a relationship by UUID
2399    ///
2400    /// Removes the relationship from storage and decrements the counter.
2401    /// Returns true if the relationship was found and deleted.
2402    pub fn delete_relationship(&self, uuid: &Uuid) -> Result<bool> {
2403        let key = uuid.as_bytes();
2404
2405        // Get the edge first to find both entities for index cleanup
2406        let edge = match self.get_relationship(uuid)? {
2407            Some(e) => e,
2408            None => return Ok(false),
2409        };
2410
2411        // Delete from main storage
2412        self.db.delete_cf(self.relationships_cf(), key)?;
2413        self.relationship_count.fetch_sub(1, Ordering::Relaxed);
2414
2415        // Remove from entity_edges index for BOTH entities
2416        // (add_relationship indexes both from_entity and to_entity)
2417        let from_key = format!("{}:{}", edge.from_entity, uuid);
2418        if let Err(e) = self
2419            .db
2420            .delete_cf(self.entity_edges_cf(), from_key.as_bytes())
2421        {
2422            tracing::warn!(edge = %uuid, key = %from_key, error = %e, "Failed to delete from entity_edges index");
2423        }
2424        let to_key = format!("{}:{}", edge.to_entity, uuid);
2425        if let Err(e) = self.db.delete_cf(self.entity_edges_cf(), to_key.as_bytes()) {
2426            tracing::warn!(edge = %uuid, key = %to_key, error = %e, "Failed to delete from entity_edges index");
2427        }
2428
2429        // Remove from entity-pair index
2430        if let Err(e) = self.remove_entity_pair_index(&edge.from_entity, &edge.to_entity) {
2431            tracing::warn!(edge = %uuid, "Failed to delete from entity_pair index: {}", e);
2432        }
2433
2434        Ok(true)
2435    }
2436
2437    /// Delete an episode and clean up associated indices and orphan edges
2438    ///
2439    /// When a memory is deleted, its corresponding episode should also be removed.
2440    /// This method:
2441    /// 1. Removes the episode from the episodes DB
2442    /// 2. Removes entity_episodes index entries
2443    /// 3. Deletes relationship edges that were sourced from this episode
2444    pub fn delete_episode(&self, episode_uuid: &Uuid) -> Result<bool> {
2445        // Fetch episode to get entity_refs for index cleanup
2446        let episode = match self.get_episode(episode_uuid)? {
2447            Some(ep) => ep,
2448            None => return Ok(false),
2449        };
2450
2451        // Delete episode from main storage
2452        self.db
2453            .delete_cf(self.episodes_cf(), episode_uuid.as_bytes())?;
2454        self.episode_count.fetch_sub(1, Ordering::Relaxed);
2455
2456        // Remove from entity_episodes inverted index
2457        for entity_uuid in &episode.entity_refs {
2458            let key = format!("{entity_uuid}:{episode_uuid}");
2459            if let Err(e) = self.db.delete_cf(self.entity_episodes_cf(), key.as_bytes()) {
2460                tracing::warn!(episode = %episode_uuid, key = %key, error = %e, "Failed to delete from entity_episodes index");
2461            }
2462        }
2463
2464        // Delete edges sourced from this episode
2465        // Scan all relationships for matching source_episode_id
2466        let iter = self
2467            .db
2468            .iterator_cf(self.relationships_cf(), rocksdb::IteratorMode::Start);
2469        let mut edges_to_delete = Vec::new();
2470        for (_, value) in iter.flatten() {
2471            if let Ok((edge, _)) = bincode::serde::decode_from_slice::<RelationshipEdge, _>(
2472                &value,
2473                bincode::config::standard(),
2474            ) {
2475                if edge.source_episode_id == Some(*episode_uuid) {
2476                    edges_to_delete.push(edge.uuid);
2477                }
2478            }
2479        }
2480
2481        for edge_uuid in &edges_to_delete {
2482            if let Err(e) = self.delete_relationship(edge_uuid) {
2483                tracing::debug!("Failed to delete orphan edge {}: {}", edge_uuid, e);
2484            }
2485        }
2486
2487        tracing::debug!(
2488            "Deleted episode {} with {} entity_refs and {} sourced edges",
2489            &episode_uuid.to_string()[..8],
2490            episode.entity_refs.len(),
2491            edges_to_delete.len()
2492        );
2493
2494        Ok(true)
2495    }
2496
2497    /// Clear all graph data (GDPR full erasure)
2498    ///
2499    /// Wipes all entities, relationships, episodes, and all indices.
2500    /// Resets all counters to zero.
2501    /// Returns (entity_count, relationship_count, episode_count) that were cleared.
2502    pub fn clear_all(&self) -> Result<(usize, usize, usize)> {
2503        let entity_count = self.entity_count.load(Ordering::Relaxed);
2504        let relationship_count = self.relationship_count.load(Ordering::Relaxed);
2505        let episode_count = self.episode_count.load(Ordering::Relaxed);
2506
2507        // Clear each column family by iterating and batch-deleting
2508        for cf_name in GRAPH_CF_NAMES {
2509            let cf = self.db.cf_handle(cf_name).unwrap();
2510            let mut batch = rocksdb::WriteBatch::default();
2511            let iter = self.db.iterator_cf(cf, rocksdb::IteratorMode::Start);
2512            for (key, _) in iter.flatten() {
2513                batch.delete_cf(cf, &key);
2514            }
2515            self.db.write(batch)?;
2516        }
2517
2518        // Clear in-memory indices
2519        self.entity_name_index.write().clear();
2520        self.entity_lowercase_index.write().clear();
2521        self.entity_stemmed_index.write().clear();
2522
2523        // Reset counters
2524        self.entity_count.store(0, Ordering::Relaxed);
2525        self.relationship_count.store(0, Ordering::Relaxed);
2526        self.episode_count.store(0, Ordering::Relaxed);
2527
2528        // Drain pending maintenance queues — they reference now-deleted entities/edges
2529        let _ = std::mem::take(&mut *self.pending_prune.lock());
2530        let _ = std::mem::take(&mut *self.pending_orphan_checks.lock());
2531
2532        tracing::info!(
2533            "Graph data cleared (GDPR erasure): {} entities, {} relationships, {} episodes",
2534            entity_count,
2535            relationship_count,
2536            episode_count
2537        );
2538        Ok((entity_count, relationship_count, episode_count))
2539    }
2540
2541    /// Add an episodic node
2542    pub fn add_episode(&self, episode: EpisodicNode) -> Result<Uuid> {
2543        let key = episode.uuid.as_bytes();
2544        let entity_count = episode.entity_refs.len();
2545        tracing::debug!(
2546            "add_episode: {} with {} entity_refs",
2547            &episode.uuid.to_string()[..8],
2548            entity_count
2549        );
2550
2551        let value = bincode::serde::encode_to_vec(&episode, bincode::config::standard())?;
2552        self.db.put_cf(self.episodes_cf(), key, value)?;
2553
2554        // Increment episode counter
2555        let prev = self.episode_count.fetch_add(1, Ordering::Relaxed);
2556        tracing::debug!("add_episode: count {} -> {}", prev, prev + 1);
2557
2558        // Update inverted index: entity_uuid -> episode_uuid
2559        for entity_uuid in &episode.entity_refs {
2560            self.index_entity_episode(entity_uuid, &episode.uuid)?;
2561        }
2562
2563        Ok(episode.uuid)
2564    }
2565
2566    /// Index an episode for an entity (inverted index)
2567    fn index_entity_episode(&self, entity_uuid: &Uuid, episode_uuid: &Uuid) -> Result<()> {
2568        let key = format!("{entity_uuid}:{episode_uuid}");
2569        self.db
2570            .put_cf(self.entity_episodes_cf(), key.as_bytes(), b"1")?;
2571        Ok(())
2572    }
2573
2574    /// Get episode by UUID
2575    pub fn get_episode(&self, uuid: &Uuid) -> Result<Option<EpisodicNode>> {
2576        let key = uuid.as_bytes();
2577        match self.db.get_cf(self.episodes_cf(), key)? {
2578            Some(value) => {
2579                let (episode, _): (EpisodicNode, _) =
2580                    bincode::serde::decode_from_slice(&value, bincode::config::standard())?;
2581                Ok(Some(episode))
2582            }
2583            None => Ok(None),
2584        }
2585    }
2586
2587    /// Get all episodes that contain a specific entity
2588    ///
2589    /// Uses inverted index for O(k) lookup instead of O(n) full scan.
2590    /// Collects episode UUIDs first, then batch-reads them using multi_get.
2591    /// Crucial for spreading activation algorithm.
2592    pub fn get_episodes_by_entity(&self, entity_uuid: &Uuid) -> Result<Vec<EpisodicNode>> {
2593        let prefix = format!("{entity_uuid}:");
2594        tracing::debug!("get_episodes_by_entity: prefix {}", &prefix[..12]);
2595
2596        // Phase 1: Collect episode UUIDs from index (fast prefix scan, no data transfer)
2597        let mut episode_uuids: Vec<Uuid> = Vec::new();
2598        let iter = self
2599            .db
2600            .prefix_iterator_cf(self.entity_episodes_cf(), prefix.as_bytes());
2601        for (key, _) in iter.flatten() {
2602            if let Ok(key_str) = std::str::from_utf8(&key) {
2603                if !key_str.starts_with(&prefix) {
2604                    break;
2605                }
2606                if let Some(episode_uuid_str) = key_str.split(':').nth(1) {
2607                    if let Ok(episode_uuid) = Uuid::parse_str(episode_uuid_str) {
2608                        episode_uuids.push(episode_uuid);
2609                    }
2610                }
2611            }
2612        }
2613
2614        if episode_uuids.is_empty() {
2615            return Ok(Vec::new());
2616        }
2617
2618        // Phase 2: Batch read all episodes using multi_get (single RocksDB call)
2619        let keys: Vec<[u8; 16]> = episode_uuids.iter().map(|u| *u.as_bytes()).collect();
2620        let key_refs: Vec<&[u8]> = keys.iter().map(|k| k.as_slice()).collect();
2621
2622        let results = self
2623            .db
2624            .batched_multi_get_cf(self.episodes_cf(), &key_refs, false);
2625
2626        let mut episodes = Vec::with_capacity(episode_uuids.len());
2627        for value in results.into_iter().flatten().flatten() {
2628            if let Ok((episode, _)) = bincode::serde::decode_from_slice::<EpisodicNode, _>(
2629                &value,
2630                bincode::config::standard(),
2631            ) {
2632                episodes.push(episode);
2633            }
2634        }
2635
2636        tracing::debug!("get_episodes_by_entity: found {} episodes", episodes.len());
2637        Ok(episodes)
2638    }
2639
2640    /// Traverse graph starting from an entity (breadth-first)
2641    ///
2642    /// Implements Hebbian learning: edges traversed during retrieval are strengthened.
2643    /// This means frequently accessed pathways become stronger over time.
2644    ///
2645    /// Returns `TraversedEntity` with hop distance and decay factor for proper scoring:
2646    /// - hop 0 (start entity): decay = 1.0
2647    /// - hop 1: decay = 0.7
2648    /// - hop 2: decay = 0.49
2649    /// - etc.
2650    ///
2651    /// Performance: Uses batch edge reading and limits to handle large graphs.
2652    pub fn traverse_from_entity(
2653        &self,
2654        start_uuid: &Uuid,
2655        max_depth: usize,
2656    ) -> Result<GraphTraversal> {
2657        self.traverse_from_entity_filtered(start_uuid, max_depth, None)
2658    }
2659
2660    /// BFS graph traversal with optional minimum edge strength filter.
2661    ///
2662    /// When `min_strength` is Some, edges below the threshold are skipped
2663    /// during traversal and NOT Hebbianly strengthened (prevents ghost edge revival).
2664    pub fn traverse_from_entity_filtered(
2665        &self,
2666        start_uuid: &Uuid,
2667        max_depth: usize,
2668        min_strength: Option<f32>,
2669    ) -> Result<GraphTraversal> {
2670        // Performance limits
2671        const MAX_ENTITIES: usize = 200;
2672        const MAX_EDGES_PER_NODE: usize = 100;
2673
2674        // Use tuned decay from constants (0.15 max decay → ~86% retention per hop)
2675        // This enables deeper traversal than the old 0.7 factor
2676        use crate::constants::IMPORTANCE_DECAY_MAX;
2677        let hop_decay_factor: f32 = (-IMPORTANCE_DECAY_MAX).exp(); // e^(-0.15) ≈ 0.86
2678
2679        let mut visited_entities = HashSet::new();
2680        let mut visited_edges = HashSet::new();
2681        let mut current_level: Vec<(Uuid, usize)> = vec![(*start_uuid, 0)]; // (uuid, hop_distance)
2682        let mut all_entities: Vec<TraversedEntity> = Vec::new();
2683        let mut all_edges = Vec::new();
2684        let mut edges_to_strengthen = Vec::new();
2685
2686        visited_entities.insert(*start_uuid);
2687        if let Some(entity) = self.get_entity(start_uuid)? {
2688            all_entities.push(TraversedEntity {
2689                entity,
2690                hop_distance: 0,
2691                decay_factor: 1.0,
2692            });
2693        }
2694
2695        for depth in 0..max_depth {
2696            // Early termination if we have enough entities
2697            if all_entities.len() >= MAX_ENTITIES {
2698                break;
2699            }
2700
2701            let mut next_level = Vec::new();
2702
2703            for (entity_uuid, _hop) in &current_level {
2704                // Use limited edge reading
2705                let edges =
2706                    self.get_entity_relationships_limited(entity_uuid, Some(MAX_EDGES_PER_NODE))?;
2707
2708                for edge in edges {
2709                    if visited_edges.contains(&edge.uuid) {
2710                        continue;
2711                    }
2712
2713                    visited_edges.insert(edge.uuid);
2714
2715                    // Only traverse non-invalidated edges
2716                    if edge.invalidated_at.is_some() {
2717                        continue;
2718                    }
2719
2720                    // Compute effective strength (lazy decay calculation)
2721                    let effective = edge.effective_strength();
2722
2723                    // Skip weak edges if min_strength filter is set
2724                    if let Some(threshold) = min_strength {
2725                        if effective < threshold {
2726                            continue;
2727                        }
2728                    }
2729
2730                    // Collect edge UUID for Hebbian strengthening (only for traversed edges)
2731                    edges_to_strengthen.push(edge.uuid);
2732
2733                    // Return edge with effective strength
2734                    let mut edge_with_decay = edge.clone();
2735                    edge_with_decay.strength = effective;
2736                    all_edges.push(edge_with_decay);
2737
2738                    // Add connected entity
2739                    let connected_uuid = if edge.from_entity == *entity_uuid {
2740                        edge.to_entity
2741                    } else {
2742                        edge.from_entity
2743                    };
2744
2745                    if !visited_entities.contains(&connected_uuid) {
2746                        visited_entities.insert(connected_uuid);
2747                        let next_hop = depth + 1;
2748                        let decay = hop_decay_factor.powi(next_hop as i32);
2749
2750                        if let Some(entity) = self.get_entity(&connected_uuid)? {
2751                            all_entities.push(TraversedEntity {
2752                                entity,
2753                                hop_distance: next_hop,
2754                                decay_factor: decay,
2755                            });
2756                        }
2757                        next_level.push((connected_uuid, next_hop));
2758                    }
2759                }
2760            }
2761
2762            if next_level.is_empty() {
2763                break;
2764            }
2765
2766            current_level = next_level;
2767        }
2768
2769        // Apply Hebbian strengthening to all traversed edges atomically (SHO-65)
2770        // "Neurons that fire together, wire together"
2771        // Uses batch update for efficiency instead of individual writes
2772        if !edges_to_strengthen.is_empty() {
2773            match self.batch_strengthen_synapses(&edges_to_strengthen) {
2774                Ok(count) => {
2775                    if count > 0 {
2776                        tracing::trace!("Strengthened {} synapses during traversal", count);
2777                    }
2778                }
2779                Err(e) => {
2780                    tracing::debug!("Failed to batch strengthen synapses: {}", e);
2781                }
2782            }
2783        }
2784
2785        Ok(GraphTraversal {
2786            entities: all_entities,
2787            relationships: all_edges,
2788        })
2789    }
2790
2791    /// Weighted graph traversal with filtering (Dijkstra-style best-first)
2792    ///
2793    /// Unlike BFS traverse_from_entity, this uses edge weights to prioritize
2794    /// stronger connections. Enables Cypher-like pattern matching:
2795    /// - Filter by relationship types
2796    /// - Filter by minimum edge strength
2797    /// - Score paths by cumulative weight
2798    ///
2799    /// Returns entities sorted by path score (strongest connections first).
2800    ///
2801    /// Performance: Uses batch edge reading and early termination to handle
2802    /// large graphs (10000+ edges) efficiently.
2803    pub fn traverse_weighted(
2804        &self,
2805        start_uuid: &Uuid,
2806        max_depth: usize,
2807        relation_types: Option<&[RelationType]>,
2808        min_strength: f32,
2809    ) -> Result<GraphTraversal> {
2810        // Performance limits - prevents exponential blowup on dense graphs
2811        const MAX_ENTITIES: usize = 200; // Stop after finding this many entities
2812        const MAX_EDGES_PER_NODE: usize = 100; // Limit edges loaded per node
2813        const MAX_ITERATIONS: usize = 500; // Prevent infinite loops
2814
2815        // Priority queue entry: (negative score for max-heap, uuid, depth, path_score)
2816        #[derive(Clone)]
2817        struct PQEntry {
2818            score: f32,
2819            uuid: Uuid,
2820            depth: usize,
2821        }
2822        impl PartialEq for PQEntry {
2823            fn eq(&self, other: &Self) -> bool {
2824                self.score == other.score
2825            }
2826        }
2827        impl Eq for PQEntry {}
2828        impl PartialOrd for PQEntry {
2829            fn partial_cmp(&self, other: &Self) -> Option<CmpOrdering> {
2830                Some(self.cmp(other))
2831            }
2832        }
2833        impl Ord for PQEntry {
2834            fn cmp(&self, other: &Self) -> CmpOrdering {
2835                // BinaryHeap is a max-heap: higher score = popped first = explored first
2836                self.score.total_cmp(&other.score)
2837            }
2838        }
2839
2840        let mut visited: HashMap<Uuid, f32> = HashMap::new(); // uuid -> best path score
2841        let mut heap: BinaryHeap<PQEntry> = BinaryHeap::new();
2842        let mut all_entities: Vec<TraversedEntity> = Vec::new();
2843        let mut all_edges: Vec<RelationshipEdge> = Vec::new();
2844        let mut edges_to_strengthen: Vec<Uuid> = Vec::new();
2845        let mut iterations = 0;
2846
2847        // Start node
2848        heap.push(PQEntry {
2849            score: 1.0,
2850            uuid: *start_uuid,
2851            depth: 0,
2852        });
2853        visited.insert(*start_uuid, 1.0);
2854
2855        if let Some(entity) = self.get_entity(start_uuid)? {
2856            all_entities.push(TraversedEntity {
2857                entity,
2858                hop_distance: 0,
2859                decay_factor: 1.0,
2860            });
2861        }
2862
2863        while let Some(PQEntry { score, uuid, depth }) = heap.pop() {
2864            iterations += 1;
2865
2866            // Early termination: entity/iteration limits reached, stop draining heap
2867            if all_entities.len() >= MAX_ENTITIES || iterations >= MAX_ITERATIONS {
2868                break;
2869            }
2870
2871            // Depth limit: skip this node's children but keep processing others
2872            if depth >= max_depth {
2873                continue;
2874            }
2875
2876            // Skip if we've found a better path to this node
2877            if let Some(&best) = visited.get(&uuid) {
2878                if score < best * 0.99 {
2879                    continue;
2880                }
2881            }
2882
2883            // Use limited edge reading to avoid loading 10000+ edges
2884            let edges = self.get_entity_relationships_limited(&uuid, Some(MAX_EDGES_PER_NODE))?;
2885
2886            for edge in edges {
2887                // Skip invalidated edges
2888                if edge.invalidated_at.is_some() {
2889                    continue;
2890                }
2891
2892                // Filter by relationship type if specified
2893                if let Some(types) = relation_types {
2894                    if !types.contains(&edge.relation_type) {
2895                        continue;
2896                    }
2897                }
2898
2899                // Filter by minimum strength
2900                let effective = edge.effective_strength();
2901                if effective < min_strength {
2902                    continue;
2903                }
2904
2905                let connected_uuid = if edge.from_entity == uuid {
2906                    edge.to_entity
2907                } else {
2908                    edge.from_entity
2909                };
2910
2911                // Path score = parent_score * edge_strength (multiplicative decay)
2912                let new_score = score * effective;
2913
2914                // Only visit if this is a better path
2915                let dominated = visited
2916                    .get(&connected_uuid)
2917                    .is_some_and(|&best| new_score <= best);
2918                if dominated {
2919                    continue;
2920                }
2921
2922                // Track edge for Hebbian strengthening AFTER domination check
2923                // so only edges on optimal paths are strengthened
2924                edges_to_strengthen.push(edge.uuid);
2925
2926                let is_new_entity = !visited.contains_key(&connected_uuid);
2927                visited.insert(connected_uuid, new_score);
2928
2929                // Add edge to results
2930                let mut edge_with_strength = edge.clone();
2931                edge_with_strength.strength = effective;
2932                all_edges.push(edge_with_strength);
2933
2934                // Only add entity once (first discovery); better-path rediscovery
2935                // updates visited score but doesn't duplicate the entity in results
2936                if is_new_entity {
2937                    if let Some(entity) = self.get_entity(&connected_uuid)? {
2938                        all_entities.push(TraversedEntity {
2939                            entity,
2940                            hop_distance: depth + 1,
2941                            decay_factor: new_score,
2942                        });
2943                    }
2944                }
2945
2946                heap.push(PQEntry {
2947                    score: new_score,
2948                    uuid: connected_uuid,
2949                    depth: depth + 1,
2950                });
2951            }
2952        }
2953
2954        // Sort entities by path score (decay_factor) descending
2955        all_entities.sort_by(|a, b| b.decay_factor.total_cmp(&a.decay_factor));
2956
2957        // Hebbian strengthening
2958        if !edges_to_strengthen.is_empty() {
2959            if let Err(e) = self.batch_strengthen_synapses(&edges_to_strengthen) {
2960                tracing::debug!("Failed to strengthen synapses: {}", e);
2961            }
2962        }
2963
2964        tracing::debug!(
2965            "traverse_weighted: {} entities, {} edges (min_strength={:.2})",
2966            all_entities.len(),
2967            all_edges.len(),
2968            min_strength
2969        );
2970
2971        Ok(GraphTraversal {
2972            entities: all_entities,
2973            relationships: all_edges,
2974        })
2975    }
2976
2977    /// Bidirectional search between two entities (meet-in-middle)
2978    ///
2979    /// Complexity: O(b^(d/2)) instead of O(b^d) for unidirectional search.
2980    /// Finds the shortest weighted path between start and goal.
2981    /// Returns entities along the path with their path scores.
2982    ///
2983    /// Performance: Uses batch edge reading with limits.
2984    pub fn traverse_bidirectional(
2985        &self,
2986        start_uuid: &Uuid,
2987        goal_uuid: &Uuid,
2988        max_depth: usize,
2989        min_strength: f32,
2990    ) -> Result<GraphTraversal> {
2991        const MAX_EDGES_PER_NODE: usize = 100;
2992
2993        // Track forward search from start
2994        let mut forward_visited: HashMap<Uuid, (f32, usize)> = HashMap::new(); // uuid -> (score, depth)
2995        let mut forward_parents: HashMap<Uuid, (Uuid, Uuid)> = HashMap::new(); // child -> (parent, edge_uuid)
2996        let mut forward_frontier: Vec<(Uuid, f32, usize)> = vec![(*start_uuid, 1.0, 0)];
2997        forward_visited.insert(*start_uuid, (1.0, 0));
2998
2999        // Track backward search from goal
3000        let mut backward_visited: HashMap<Uuid, (f32, usize)> = HashMap::new();
3001        let mut backward_parents: HashMap<Uuid, (Uuid, Uuid)> = HashMap::new();
3002        let mut backward_frontier: Vec<(Uuid, f32, usize)> = vec![(*goal_uuid, 1.0, 0)];
3003        backward_visited.insert(*goal_uuid, (1.0, 0));
3004
3005        let mut meeting_node: Option<Uuid> = None;
3006        let mut best_path_score: f32 = 0.0;
3007        let half_depth = max_depth / 2 + 1;
3008
3009        // Alternate forward and backward expansion
3010        for _round in 0..half_depth {
3011            // Expand forward frontier
3012            let mut new_forward: Vec<(Uuid, f32, usize)> = Vec::new();
3013            for (uuid, score, depth) in forward_frontier.drain(..) {
3014                if depth >= half_depth {
3015                    continue;
3016                }
3017
3018                let edges =
3019                    self.get_entity_relationships_limited(&uuid, Some(MAX_EDGES_PER_NODE))?;
3020                for edge in edges {
3021                    if edge.invalidated_at.is_some() {
3022                        continue;
3023                    }
3024                    let effective = edge.effective_strength();
3025                    if effective < min_strength {
3026                        continue;
3027                    }
3028
3029                    let connected = if edge.from_entity == uuid {
3030                        edge.to_entity
3031                    } else {
3032                        edge.from_entity
3033                    };
3034                    let new_score = score * effective;
3035
3036                    // Check if we meet backward search
3037                    if let Some(&(back_score, _)) = backward_visited.get(&connected) {
3038                        let combined = new_score * back_score;
3039                        if combined > best_path_score {
3040                            best_path_score = combined;
3041                            meeting_node = Some(connected);
3042                        }
3043                    }
3044
3045                    // Update forward frontier
3046                    let dominated = forward_visited
3047                        .get(&connected)
3048                        .is_some_and(|&(best, _)| new_score <= best);
3049                    if !dominated {
3050                        forward_visited.insert(connected, (new_score, depth + 1));
3051                        forward_parents.insert(connected, (uuid, edge.uuid));
3052                        new_forward.push((connected, new_score, depth + 1));
3053                    }
3054                }
3055            }
3056            forward_frontier = new_forward;
3057
3058            // Expand backward frontier
3059            let mut new_backward: Vec<(Uuid, f32, usize)> = Vec::new();
3060            for (uuid, score, depth) in backward_frontier.drain(..) {
3061                if depth >= half_depth {
3062                    continue;
3063                }
3064
3065                let edges =
3066                    self.get_entity_relationships_limited(&uuid, Some(MAX_EDGES_PER_NODE))?;
3067                for edge in edges {
3068                    if edge.invalidated_at.is_some() {
3069                        continue;
3070                    }
3071                    let effective = edge.effective_strength();
3072                    if effective < min_strength {
3073                        continue;
3074                    }
3075
3076                    let connected = if edge.from_entity == uuid {
3077                        edge.to_entity
3078                    } else {
3079                        edge.from_entity
3080                    };
3081                    let new_score = score * effective;
3082
3083                    // Check if we meet forward search
3084                    if let Some(&(fwd_score, _)) = forward_visited.get(&connected) {
3085                        let combined = fwd_score * new_score;
3086                        if combined > best_path_score {
3087                            best_path_score = combined;
3088                            meeting_node = Some(connected);
3089                        }
3090                    }
3091
3092                    // Update backward frontier
3093                    let dominated = backward_visited
3094                        .get(&connected)
3095                        .is_some_and(|&(best, _)| new_score <= best);
3096                    if !dominated {
3097                        backward_visited.insert(connected, (new_score, depth + 1));
3098                        backward_parents.insert(connected, (uuid, edge.uuid));
3099                        new_backward.push((connected, new_score, depth + 1));
3100                    }
3101                }
3102            }
3103            backward_frontier = new_backward;
3104
3105            // Early termination if we found a meeting point
3106            if meeting_node.is_some() {
3107                break;
3108            }
3109        }
3110
3111        // Reconstruct path from meeting node
3112        let mut all_entities: Vec<TraversedEntity> = Vec::new();
3113        let mut all_edges: Vec<RelationshipEdge> = Vec::new();
3114        let mut edges_to_strengthen: Vec<Uuid> = Vec::new();
3115
3116        if let Some(meeting) = meeting_node {
3117            // Forward path: start -> meeting
3118            let mut path_forward: Vec<Uuid> = vec![meeting];
3119            let mut current = meeting;
3120            while let Some(&(parent, edge_uuid)) = forward_parents.get(&current) {
3121                path_forward.push(parent);
3122                edges_to_strengthen.push(edge_uuid);
3123                if let Some(edge) = self.get_relationship(&edge_uuid)? {
3124                    all_edges.push(edge);
3125                }
3126                current = parent;
3127            }
3128            path_forward.reverse();
3129
3130            // Backward path: meeting -> goal
3131            let mut path_backward: Vec<Uuid> = Vec::new();
3132            current = meeting;
3133            while let Some(&(parent, edge_uuid)) = backward_parents.get(&current) {
3134                path_backward.push(parent);
3135                edges_to_strengthen.push(edge_uuid);
3136                if let Some(edge) = self.get_relationship(&edge_uuid)? {
3137                    all_edges.push(edge);
3138                }
3139                current = parent;
3140            }
3141
3142            // Combine paths
3143            let full_path: Vec<Uuid> = path_forward.into_iter().chain(path_backward).collect();
3144
3145            // Build entities with scores
3146            for (i, uuid) in full_path.iter().enumerate() {
3147                if let Some(entity) = self.get_entity(uuid)? {
3148                    let score = forward_visited
3149                        .get(uuid)
3150                        .map(|&(s, _)| s)
3151                        .or_else(|| backward_visited.get(uuid).map(|&(s, _)| s))
3152                        .unwrap_or(0.5);
3153                    all_entities.push(TraversedEntity {
3154                        entity,
3155                        hop_distance: i,
3156                        decay_factor: score,
3157                    });
3158                }
3159            }
3160        } else {
3161            // No path found - return empty traversal
3162            tracing::debug!(
3163                "traverse_bidirectional: no path between {:?} and {:?}",
3164                start_uuid,
3165                goal_uuid
3166            );
3167        }
3168
3169        // Hebbian strengthening for traversed edges
3170        if !edges_to_strengthen.is_empty() {
3171            if let Err(e) = self.batch_strengthen_synapses(&edges_to_strengthen) {
3172                tracing::debug!("Failed to strengthen synapses: {}", e);
3173            }
3174        }
3175
3176        tracing::debug!(
3177            "traverse_bidirectional: {} entities, {} edges, path_score={:.4}",
3178            all_entities.len(),
3179            all_edges.len(),
3180            best_path_score
3181        );
3182
3183        Ok(GraphTraversal {
3184            entities: all_entities,
3185            relationships: all_edges,
3186        })
3187    }
3188
3189    /// Subgraph pattern matching (Cypher-like MATCH patterns)
3190    ///
3191    /// Pattern format: Vec of (relation_type, direction) tuples
3192    /// Direction: true = outgoing (a->b), false = incoming (a<-b)
3193    ///
3194    /// Example: MATCH (a)-[:WORKS_AT]->(b)-[:LOCATED_IN]->(c)
3195    /// Pattern: vec![(WorksAt, true), (LocatedIn, true)]
3196    ///
3197    /// Returns all entities that match the pattern starting from start_uuid.
3198    pub fn match_pattern(
3199        &self,
3200        start_uuid: &Uuid,
3201        pattern: &[(RelationType, bool)], // (relation_type, is_outgoing)
3202        min_strength: f32,
3203    ) -> Result<Vec<Vec<TraversedEntity>>> {
3204        let mut matches: Vec<Vec<TraversedEntity>> = Vec::new();
3205
3206        // Start entity
3207        let start_entity = match self.get_entity(start_uuid)? {
3208            Some(e) => e,
3209            None => return Ok(matches),
3210        };
3211
3212        // DFS backtracking search
3213        let mut path: Vec<TraversedEntity> = vec![TraversedEntity {
3214            entity: start_entity,
3215            hop_distance: 0,
3216            decay_factor: 1.0,
3217        }];
3218
3219        self.match_pattern_recursive(
3220            *start_uuid,
3221            pattern,
3222            0,
3223            min_strength,
3224            1.0,
3225            &mut path,
3226            &mut matches,
3227        )?;
3228
3229        tracing::debug!(
3230            "match_pattern: found {} matches for {}-step pattern",
3231            matches.len(),
3232            pattern.len()
3233        );
3234
3235        Ok(matches)
3236    }
3237
3238    #[allow(clippy::too_many_arguments)]
3239    fn match_pattern_recursive(
3240        &self,
3241        current_uuid: Uuid,
3242        pattern: &[(RelationType, bool)],
3243        pattern_idx: usize,
3244        min_strength: f32,
3245        path_score: f32,
3246        path: &mut Vec<TraversedEntity>,
3247        matches: &mut Vec<Vec<TraversedEntity>>,
3248    ) -> Result<()> {
3249        // Base case: completed the pattern
3250        if pattern_idx >= pattern.len() {
3251            matches.push(path.clone());
3252            return Ok(());
3253        }
3254
3255        const MAX_EDGES_PER_NODE: usize = 100;
3256        let (required_type, is_outgoing) = &pattern[pattern_idx];
3257        let edges =
3258            self.get_entity_relationships_limited(&current_uuid, Some(MAX_EDGES_PER_NODE))?;
3259
3260        for edge in edges {
3261            if edge.invalidated_at.is_some() {
3262                continue;
3263            }
3264
3265            // Check relationship type
3266            if edge.relation_type != *required_type {
3267                continue;
3268            }
3269
3270            // Check direction
3271            let (next_uuid, direction_matches) = if *is_outgoing {
3272                // Looking for current -> next
3273                if edge.from_entity == current_uuid {
3274                    (edge.to_entity, true)
3275                } else {
3276                    (edge.from_entity, false) // Wrong direction
3277                }
3278            } else {
3279                // Looking for current <- next (incoming)
3280                if edge.to_entity == current_uuid {
3281                    (edge.from_entity, true)
3282                } else {
3283                    (edge.to_entity, false) // Wrong direction
3284                }
3285            };
3286
3287            if !direction_matches {
3288                continue;
3289            }
3290
3291            // Check strength
3292            let effective = edge.effective_strength();
3293            if effective < min_strength {
3294                continue;
3295            }
3296
3297            // Avoid cycles in pattern
3298            if path.iter().any(|te| te.entity.uuid == next_uuid) {
3299                continue;
3300            }
3301
3302            // Add to path and recurse
3303            if let Some(entity) = self.get_entity(&next_uuid)? {
3304                let new_score = path_score * effective;
3305                path.push(TraversedEntity {
3306                    entity,
3307                    hop_distance: pattern_idx + 1,
3308                    decay_factor: new_score,
3309                });
3310
3311                self.match_pattern_recursive(
3312                    next_uuid,
3313                    pattern,
3314                    pattern_idx + 1,
3315                    min_strength,
3316                    new_score,
3317                    path,
3318                    matches,
3319                )?;
3320
3321                path.pop();
3322            }
3323        }
3324
3325        Ok(())
3326    }
3327
3328    /// Find entities matching a pattern from any starting point
3329    ///
3330    /// Scans all entities and finds those that match the given pattern.
3331    /// More expensive than match_pattern but doesn't require a known start.
3332    ///
3333    /// Pattern: Vec of (relation_type, is_outgoing) tuples
3334    /// Returns: All complete pattern matches with their paths.
3335    pub fn find_pattern_matches(
3336        &self,
3337        pattern: &[(RelationType, bool)],
3338        min_strength: f32,
3339        limit: usize,
3340    ) -> Result<Vec<Vec<TraversedEntity>>> {
3341        let mut all_matches: Vec<Vec<TraversedEntity>> = Vec::new();
3342
3343        // Iterate through all entities as potential starting points
3344        let iter = self
3345            .db
3346            .iterator_cf(self.entities_cf(), rocksdb::IteratorMode::Start);
3347        for result in iter {
3348            if all_matches.len() >= limit {
3349                break;
3350            }
3351
3352            let (_, value) = result?;
3353            let (entity, _): (EntityNode, _) =
3354                bincode::serde::decode_from_slice(&value, bincode::config::standard())?;
3355
3356            let entity_matches = self.match_pattern(&entity.uuid, pattern, min_strength)?;
3357            for m in entity_matches {
3358                if all_matches.len() >= limit {
3359                    break;
3360                }
3361                all_matches.push(m);
3362            }
3363        }
3364
3365        tracing::debug!(
3366            "find_pattern_matches: {} total matches (limit={})",
3367            all_matches.len(),
3368            limit
3369        );
3370
3371        Ok(all_matches)
3372    }
3373
3374    /// Invalidate a relationship (temporal edge invalidation)
3375    ///
3376    /// Guarded by synapse_update_lock to prevent race with strengthen/decay.
3377    pub fn invalidate_relationship(&self, edge_uuid: &Uuid) -> Result<()> {
3378        let _guard = self
3379            .synapse_update_lock
3380            .try_lock_for(std::time::Duration::from_secs(5))
3381            .ok_or_else(|| {
3382                anyhow::anyhow!("synapse_update_lock timeout in invalidate_relationship")
3383            })?;
3384
3385        if let Some(mut edge) = self.get_relationship(edge_uuid)? {
3386            edge.invalidated_at = Some(Utc::now());
3387
3388            let key = edge.uuid.as_bytes();
3389            let value = bincode::serde::encode_to_vec(&edge, bincode::config::standard())?;
3390            self.db.put_cf(self.relationships_cf(), key, value)?;
3391        }
3392
3393        Ok(())
3394    }
3395
3396    /// Strengthen a synapse (Hebbian learning)
3397    ///
3398    /// Called when an edge is traversed during memory retrieval.
3399    /// Implements "neurons that fire together, wire together".
3400    ///
3401    /// Uses a mutex to prevent race conditions during concurrent updates (SHO-64).
3402    pub fn strengthen_synapse(&self, edge_uuid: &Uuid) -> Result<()> {
3403        // Lock with timeout to prevent deadlock on panic
3404        let _guard = self
3405            .synapse_update_lock
3406            .try_lock_for(std::time::Duration::from_secs(5))
3407            .ok_or_else(|| anyhow::anyhow!("synapse_update_lock timeout in strengthen_synapse"))?;
3408
3409        if let Some(mut edge) = self.get_relationship(edge_uuid)? {
3410            let _ = edge.strengthen();
3411
3412            let key = edge.uuid.as_bytes();
3413            let value = bincode::serde::encode_to_vec(&edge, bincode::config::standard())?;
3414            self.db.put_cf(self.relationships_cf(), key, value)?;
3415        }
3416
3417        Ok(())
3418    }
3419
3420    /// Batch strengthen multiple synapses atomically (SHO-65)
3421    ///
3422    /// More efficient than calling strengthen_synapse individually for each edge.
3423    /// Uses RocksDB WriteBatch for atomic multi-write and a single lock acquisition.
3424    ///
3425    /// Returns the number of synapses successfully strengthened.
3426    pub fn batch_strengthen_synapses(&self, edge_uuids: &[Uuid]) -> Result<usize> {
3427        if edge_uuids.is_empty() {
3428            return Ok(0);
3429        }
3430
3431        // Single lock acquisition for entire batch, with timeout
3432        let _guard = self
3433            .synapse_update_lock
3434            .try_lock_for(std::time::Duration::from_secs(5))
3435            .ok_or_else(|| {
3436                anyhow::anyhow!("synapse_update_lock timeout in batch_strengthen_synapses")
3437            })?;
3438
3439        // Batch read all edges in a single RocksDB call (same pattern as get_entity_relationships_limited)
3440        let keys: Vec<[u8; 16]> = edge_uuids.iter().map(|u| *u.as_bytes()).collect();
3441        let key_refs: Vec<&[u8]> = keys.iter().map(|k| k.as_slice()).collect();
3442        let results = self
3443            .db
3444            .batched_multi_get_cf(self.relationships_cf(), &key_refs, false);
3445
3446        let mut batch = WriteBatch::default();
3447        let mut strengthened = 0;
3448
3449        for (i, result) in results.into_iter().enumerate() {
3450            if let Ok(Some(value)) = result {
3451                if let Ok((mut edge, _)) = bincode::serde::decode_from_slice::<RelationshipEdge, _>(
3452                    &value,
3453                    bincode::config::standard(),
3454                ) {
3455                    let _ = edge.strengthen();
3456                    match bincode::serde::encode_to_vec(&edge, bincode::config::standard()) {
3457                        Ok(encoded) => {
3458                            batch.put_cf(self.relationships_cf(), &keys[i], encoded);
3459                            strengthened += 1;
3460                        }
3461                        Err(e) => {
3462                            tracing::debug!("Failed to serialize edge {}: {}", edge_uuids[i], e);
3463                        }
3464                    }
3465                }
3466            }
3467        }
3468
3469        // Atomic write of all updates
3470        if strengthened > 0 {
3471            self.db.write(batch)?;
3472        }
3473
3474        Ok(strengthened)
3475    }
3476
3477    /// Record co-retrieval of memories (Hebbian learning between memories)
3478    ///
3479    /// When memories are retrieved together, they form associations.
3480    /// This creates or strengthens CoRetrieved edges between all pairs of memories.
3481    ///
3482    /// Note: Limits to top N memories to avoid O(n²) explosion on large retrievals.
3483    /// Returns the number of edges created/strengthened.
3484    pub fn record_memory_coactivation(&self, memory_ids: &[Uuid]) -> Result<usize> {
3485        const MAX_COACTIVATION_SIZE: usize = 20;
3486
3487        // Limit to top N to bound worst-case complexity
3488        let memories_to_process = if memory_ids.len() > MAX_COACTIVATION_SIZE {
3489            &memory_ids[..MAX_COACTIVATION_SIZE]
3490        } else {
3491            memory_ids
3492        };
3493
3494        if memories_to_process.len() < 2 {
3495            return Ok(0);
3496        }
3497
3498        let _guard = self
3499            .synapse_update_lock
3500            .try_lock_for(std::time::Duration::from_secs(5))
3501            .ok_or_else(|| {
3502                anyhow::anyhow!("synapse_update_lock timeout in record_memory_coactivation")
3503            })?;
3504        let mut batch = WriteBatch::default();
3505        let mut edges_updated = 0;
3506        let mut new_edges = 0;
3507
3508        // Process all pairs
3509        for i in 0..memories_to_process.len() {
3510            for j in (i + 1)..memories_to_process.len() {
3511                let mem_a = memories_to_process[i];
3512                let mem_b = memories_to_process[j];
3513
3514                // Try to find existing edge between these memories
3515                let existing_edge = self.find_edge_between_entities(&mem_a, &mem_b)?;
3516
3517                if let Some(mut edge) = existing_edge {
3518                    // Strengthen existing edge
3519                    let _ = edge.strengthen();
3520                    let key = edge.uuid.as_bytes();
3521                    if let Ok(value) =
3522                        bincode::serde::encode_to_vec(&edge, bincode::config::standard())
3523                    {
3524                        batch.put_cf(self.relationships_cf(), key, value);
3525                        edges_updated += 1;
3526                    }
3527                } else {
3528                    // Create new CoRetrieved edge (bidirectional represented as single edge)
3529                    // Starts in L1 (working memory) with tier-specific initial weight
3530                    let edge = RelationshipEdge {
3531                        uuid: Uuid::new_v4(),
3532                        from_entity: mem_a,
3533                        to_entity: mem_b,
3534                        relation_type: RelationType::CoRetrieved,
3535                        strength: EdgeTier::L1Working.initial_weight(),
3536                        created_at: Utc::now(),
3537                        valid_at: Utc::now(),
3538                        invalidated_at: None,
3539                        source_episode_id: None,
3540                        context: String::new(),
3541                        last_activated: Utc::now(),
3542                        activation_count: 1,
3543                        ltp_status: LtpStatus::None,
3544                        activation_timestamps: None,
3545                        tier: EdgeTier::L1Working,
3546                        // PIPE-5: Memory-to-memory edges use default confidence
3547                        entity_confidence: None,
3548                    };
3549
3550                    let key = edge.uuid.as_bytes();
3551                    if let Ok(value) =
3552                        bincode::serde::encode_to_vec(&edge, bincode::config::standard())
3553                    {
3554                        batch.put_cf(self.relationships_cf(), key, value);
3555
3556                        // Also index in the reverse direction for lookup
3557                        let idx_key_fwd = format!("mem_edge:{mem_a}:{mem_b}");
3558                        let idx_key_rev = format!("mem_edge:{mem_b}:{mem_a}");
3559                        batch.put_cf(
3560                            self.relationships_cf(),
3561                            idx_key_fwd.as_bytes(),
3562                            edge.uuid.as_bytes(),
3563                        );
3564                        batch.put_cf(
3565                            self.relationships_cf(),
3566                            idx_key_rev.as_bytes(),
3567                            edge.uuid.as_bytes(),
3568                        );
3569
3570                        edges_updated += 1;
3571                        new_edges += 1;
3572                    }
3573                }
3574            }
3575        }
3576
3577        if edges_updated > 0 {
3578            self.db.write(batch)?;
3579            // Update relationship counter for newly created edges
3580            if new_edges > 0 {
3581                self.relationship_count
3582                    .fetch_add(new_edges, Ordering::Relaxed);
3583            }
3584        }
3585
3586        Ok(edges_updated)
3587    }
3588
3589    /// Find an edge between two entities/memories (in either direction)
3590    fn find_edge_between_entities(
3591        &self,
3592        entity_a: &Uuid,
3593        entity_b: &Uuid,
3594    ) -> Result<Option<RelationshipEdge>> {
3595        // Check forward index
3596        let idx_key = format!("mem_edge:{entity_a}:{entity_b}");
3597        if let Some(edge_uuid_bytes) = self
3598            .db
3599            .get_cf(self.relationships_cf(), idx_key.as_bytes())?
3600        {
3601            if edge_uuid_bytes.len() == 16 {
3602                let edge_uuid = Uuid::from_slice(&edge_uuid_bytes)?;
3603                return self.get_relationship(&edge_uuid);
3604            }
3605        }
3606
3607        // Check reverse index
3608        let idx_key_rev = format!("mem_edge:{entity_b}:{entity_a}");
3609        if let Some(edge_uuid_bytes) = self
3610            .db
3611            .get_cf(self.relationships_cf(), idx_key_rev.as_bytes())?
3612        {
3613            if edge_uuid_bytes.len() == 16 {
3614                let edge_uuid = Uuid::from_slice(&edge_uuid_bytes)?;
3615                return self.get_relationship(&edge_uuid);
3616            }
3617        }
3618
3619        Ok(None)
3620    }
3621
3622    /// Batch strengthen edges between memory pairs from replay consolidation
3623    ///
3624    /// Takes edge boosts from memory replay and applies Hebbian strengthening.
3625    /// Creates edges if they don't exist, strengthens if they do.
3626    ///
3627    /// Returns (count_strengthened, promotion_boosts) where promotion_boosts contains
3628    /// signals for any edge tier promotions that occurred (Direction 1 coupling).
3629    pub fn strengthen_memory_edges(
3630        &self,
3631        edge_boosts: &[(String, String, f32)],
3632    ) -> Result<(usize, Vec<crate::memory::types::EdgePromotionBoost>)> {
3633        use crate::constants::{EDGE_PROMOTION_MEMORY_BOOST_L2, EDGE_PROMOTION_MEMORY_BOOST_L3};
3634
3635        if edge_boosts.is_empty() {
3636            return Ok((0, Vec::new()));
3637        }
3638
3639        let _guard = self
3640            .synapse_update_lock
3641            .try_lock_for(std::time::Duration::from_secs(5))
3642            .ok_or_else(|| {
3643                anyhow::anyhow!("synapse_update_lock timeout in strengthen_edges_from_boosts")
3644            })?;
3645        let mut batch = WriteBatch::default();
3646        let mut strengthened = 0;
3647        let mut promotion_boosts = Vec::new();
3648
3649        for (from_id_str, to_id_str, _boost) in edge_boosts {
3650            // Parse UUIDs
3651            let from_uuid = match Uuid::parse_str(from_id_str) {
3652                Ok(u) => u,
3653                Err(_) => {
3654                    tracing::debug!("Invalid from_id UUID: {}", from_id_str);
3655                    continue;
3656                }
3657            };
3658            let to_uuid = match Uuid::parse_str(to_id_str) {
3659                Ok(u) => u,
3660                Err(_) => {
3661                    tracing::debug!("Invalid to_id UUID: {}", to_id_str);
3662                    continue;
3663                }
3664            };
3665
3666            // Find or create edge
3667            let existing_edge = self.find_edge_between_entities(&from_uuid, &to_uuid)?;
3668
3669            if let Some(mut edge) = existing_edge {
3670                // Strengthen existing edge — capture tier promotion if it occurs
3671                let promotion = edge.strengthen();
3672                let key = edge.uuid.as_bytes();
3673                if let Ok(value) = bincode::serde::encode_to_vec(&edge, bincode::config::standard())
3674                {
3675                    batch.put_cf(self.relationships_cf(), key, value);
3676                    strengthened += 1;
3677
3678                    // If a tier promotion occurred, emit boost signals for both memories
3679                    if let Some((old_tier, new_tier)) = promotion {
3680                        let boost = if new_tier.contains("L2") {
3681                            EDGE_PROMOTION_MEMORY_BOOST_L2
3682                        } else {
3683                            EDGE_PROMOTION_MEMORY_BOOST_L3
3684                        };
3685                        let entity_name = format!(
3686                            "{}↔{}",
3687                            &from_id_str[..8.min(from_id_str.len())],
3688                            &to_id_str[..8.min(to_id_str.len())]
3689                        );
3690                        // Boost both memories involved in the promoted edge
3691                        promotion_boosts.push(crate::memory::types::EdgePromotionBoost {
3692                            memory_id: from_id_str.clone(),
3693                            entity_name: entity_name.clone(),
3694                            old_tier: old_tier.clone(),
3695                            new_tier: new_tier.clone(),
3696                            boost,
3697                        });
3698                        promotion_boosts.push(crate::memory::types::EdgePromotionBoost {
3699                            memory_id: to_id_str.clone(),
3700                            entity_name,
3701                            old_tier,
3702                            new_tier,
3703                            boost,
3704                        });
3705                    }
3706                }
3707            } else {
3708                // Create new ReplayStrengthened edge
3709                // Replay edges start in L2 (episodic) since they represent consolidated associations
3710                let edge = RelationshipEdge {
3711                    uuid: Uuid::new_v4(),
3712                    from_entity: from_uuid,
3713                    to_entity: to_uuid,
3714                    relation_type: RelationType::CoRetrieved,
3715                    strength: EdgeTier::L2Episodic.initial_weight(),
3716                    created_at: Utc::now(),
3717                    valid_at: Utc::now(),
3718                    invalidated_at: None,
3719                    source_episode_id: None,
3720                    context: "replay_strengthened".to_string(),
3721                    last_activated: Utc::now(),
3722                    activation_count: 1,
3723                    ltp_status: LtpStatus::None,
3724                    activation_timestamps: None,
3725                    tier: EdgeTier::L2Episodic,
3726                    // PIPE-5: Replay edges use default confidence
3727                    entity_confidence: None,
3728                };
3729
3730                let key = edge.uuid.as_bytes();
3731                if let Ok(value) = bincode::serde::encode_to_vec(&edge, bincode::config::standard())
3732                {
3733                    batch.put_cf(self.relationships_cf(), key, value);
3734
3735                    // Index both directions
3736                    let idx_key_fwd = format!("mem_edge:{from_uuid}:{to_uuid}");
3737                    let idx_key_rev = format!("mem_edge:{to_uuid}:{from_uuid}");
3738                    batch.put_cf(
3739                        self.relationships_cf(),
3740                        idx_key_fwd.as_bytes(),
3741                        edge.uuid.as_bytes(),
3742                    );
3743                    batch.put_cf(
3744                        self.relationships_cf(),
3745                        idx_key_rev.as_bytes(),
3746                        edge.uuid.as_bytes(),
3747                    );
3748
3749                    strengthened += 1;
3750                }
3751            }
3752        }
3753
3754        if strengthened > 0 {
3755            self.db.write(batch)?;
3756
3757            // Index new replay edges in entity_edges CF so they're visible to
3758            // traversal and degree-cap enforcement (GQ-11 fix)
3759            let mut entities_to_prune = Vec::new();
3760            for (from_id_str, to_id_str, _boost) in edge_boosts {
3761                let from_uuid = match Uuid::parse_str(from_id_str) {
3762                    Ok(u) => u,
3763                    Err(_) => continue,
3764                };
3765                let to_uuid = match Uuid::parse_str(to_id_str) {
3766                    Ok(u) => u,
3767                    Err(_) => continue,
3768                };
3769                // Only index edges that we actually wrote (find_edge_between_entities returns
3770                // the edge if it existed before, so new edges are the ones that didn't exist)
3771                if let Ok(Some(edge)) = self.find_edge_between_entities(&from_uuid, &to_uuid) {
3772                    if edge.context == "replay_strengthened" && edge.activation_count <= 1 {
3773                        if let Err(e) = self.index_entity_edge(&from_uuid, &edge.uuid) {
3774                            tracing::debug!("Failed to index replay edge for entity: {}", e);
3775                        }
3776                        if let Err(e) = self.index_entity_edge(&to_uuid, &edge.uuid) {
3777                            tracing::debug!("Failed to index replay edge for entity: {}", e);
3778                        }
3779                        entities_to_prune.push(from_uuid);
3780                        entities_to_prune.push(to_uuid);
3781                    }
3782                }
3783            }
3784
3785            // Enforce degree cap on affected entities
3786            for entity_uuid in &entities_to_prune {
3787                let _ = self.prune_entity_if_over_degree(entity_uuid);
3788            }
3789
3790            tracing::debug!(
3791                "Applied {} edge boosts from replay consolidation ({} tier promotions)",
3792                strengthened,
3793                promotion_boosts.len()
3794            );
3795        }
3796
3797        Ok((strengthened, promotion_boosts))
3798    }
3799
3800    /// Find memories associated with a given memory through co-retrieval
3801    ///
3802    /// Uses weighted graph traversal prioritizing stronger associations.
3803    /// Returns memory UUIDs sorted by association strength.
3804    pub fn find_memory_associations(
3805        &self,
3806        memory_id: &Uuid,
3807        max_results: usize,
3808    ) -> Result<Vec<(Uuid, f32)>> {
3809        let mut associations: Vec<(Uuid, f32)> = Vec::new();
3810
3811        // Scan for edges involving this memory
3812        let prefix_fwd = format!("mem_edge:{memory_id}:");
3813
3814        let iter = self
3815            .db
3816            .prefix_iterator_cf(self.relationships_cf(), prefix_fwd.as_bytes());
3817        for item in iter {
3818            let (key, value) = item?;
3819
3820            // Check if this is our prefix (RocksDB prefix_iterator may return extra)
3821            let key_str = String::from_utf8_lossy(&key);
3822            if !key_str.starts_with(&prefix_fwd) {
3823                break;
3824            }
3825
3826            // Get edge UUID from value and look up edge
3827            if value.len() == 16 {
3828                let edge_uuid = Uuid::from_slice(&value)?;
3829                if let Some(edge) = self.get_relationship(&edge_uuid)? {
3830                    // Get the other memory in this edge
3831                    let other_id = if edge.from_entity == *memory_id {
3832                        edge.to_entity
3833                    } else {
3834                        edge.from_entity
3835                    };
3836
3837                    // Get effective strength with decay
3838                    let effective_strength = edge.effective_strength();
3839                    if effective_strength > LTP_MIN_STRENGTH {
3840                        associations.push((other_id, effective_strength));
3841                    }
3842                }
3843            }
3844        }
3845
3846        // Sort by strength descending and limit
3847        associations.sort_by(|a, b| b.1.total_cmp(&a.1));
3848        associations.truncate(max_results);
3849
3850        Ok(associations)
3851    }
3852
3853    /// Strengthen entity-entity edges for a replayed memory's episode.
3854    ///
3855    /// During consolidation replay, this reinforces the entity relationships that
3856    /// were involved in the replayed memory. This is "Direction 3" of the Hebbian
3857    /// maintenance system — entity-entity edges get strengthened alongside
3858    /// memory-to-memory edges (Direction 1) and lazy pruning (Direction 2).
3859    ///
3860    /// Algorithm:
3861    /// 1. Look up EpisodicNode for episode_id → get entity_refs
3862    /// 2. For each pair of entities, find their RelationshipEdge
3863    /// 3. Call strengthen() on each edge (Hebbian boost + LTP detection + tier promotion)
3864    /// 4. Batch write all updates
3865    pub fn strengthen_episode_entity_edges(&self, episode_id: &Uuid) -> Result<usize> {
3866        let episode = match self.get_episode(episode_id) {
3867            Ok(Some(ep)) => ep,
3868            Ok(None) => return Ok(0),
3869            Err(_) => return Ok(0),
3870        };
3871
3872        if episode.entity_refs.len() < 2 {
3873            return Ok(0);
3874        }
3875
3876        let _guard = self
3877            .synapse_update_lock
3878            .try_lock_for(std::time::Duration::from_secs(5))
3879            .ok_or_else(|| {
3880                anyhow::anyhow!("synapse_update_lock timeout in strengthen_episode_entity_edges")
3881            })?;
3882        let mut batch = WriteBatch::default();
3883        let mut strengthened = 0;
3884
3885        // Iterate over unique entity pairs
3886        let refs = &episode.entity_refs;
3887        let max_pairs = refs.len().min(20); // Cap to avoid O(n²) on large episodes
3888        for i in 0..max_pairs {
3889            for j in (i + 1)..max_pairs {
3890                let entity_a = &refs[i];
3891                let entity_b = &refs[j];
3892
3893                // Find existing edge between this entity pair
3894                if let Ok(Some(mut edge)) = self.find_edge_between_entities(entity_a, entity_b) {
3895                    if edge.invalidated_at.is_some() {
3896                        continue;
3897                    }
3898                    let _ = edge.strengthen();
3899                    let key = edge.uuid.as_bytes();
3900                    if let Ok(value) =
3901                        bincode::serde::encode_to_vec(&edge, bincode::config::standard())
3902                    {
3903                        batch.put_cf(self.relationships_cf(), key, value);
3904                        strengthened += 1;
3905                    }
3906                }
3907                // Don't create new edges — only strengthen existing ones from NER
3908            }
3909        }
3910
3911        if strengthened > 0 {
3912            self.db.write(batch)?;
3913            tracing::debug!(
3914                "Strengthened {} entity-entity edges for episode {}",
3915                strengthened,
3916                &episode_id.to_string()[..8]
3917            );
3918        }
3919
3920        Ok(strengthened)
3921    }
3922
3923    /// Get average Hebbian strength for a memory based on its entity relationships
3924    ///
3925    /// This looks up the entities referenced by the memory and averages their
3926    /// relationship strengths in the graph. Used for composite relevance scoring.
3927    ///
3928    /// The algorithm:
3929    /// 1. Look up memory's EpisodicNode (memory_id.0 == episode UUID)
3930    /// 2. Get entity_refs from the episode
3931    /// 3. For each entity, get relationships using get_entity_relationships
3932    /// 4. Filter to edges where both endpoints are in the memory's entity set
3933    /// 5. Return average effective_strength of these intra-memory edges
3934    ///
3935    /// Returns 0.5 (neutral) if no entities or relationships found.
3936    pub fn get_memory_hebbian_strength(&self, memory_id: &crate::memory::MemoryId) -> Option<f32> {
3937        // 1. Look up EpisodicNode for this memory (memory_id.0 == episode UUID)
3938        let episode = match self.get_episode(&memory_id.0) {
3939            Ok(Some(ep)) => ep,
3940            Ok(None) => return Some(0.5), // No episode found - neutral
3941            Err(_) => return Some(0.5),   // Error - neutral fallback
3942        };
3943
3944        // 2. Get entity references from the episode
3945        if episode.entity_refs.is_empty() {
3946            return Some(0.5); // No entities - neutral
3947        }
3948
3949        // Build a set of entity UUIDs for fast lookup
3950        let entity_set: std::collections::HashSet<Uuid> =
3951            episode.entity_refs.iter().cloned().collect();
3952
3953        // 3. Collect all intra-memory relationship strengths
3954        let mut strengths: Vec<f32> = Vec::new();
3955        let mut seen_edges: std::collections::HashSet<Uuid> = std::collections::HashSet::new();
3956
3957        const MAX_EDGES_PER_ENTITY: usize = 50; // Limit per entity for Hebbian lookup
3958        for entity_uuid in &episode.entity_refs {
3959            if let Ok(edges) =
3960                self.get_entity_relationships_limited(entity_uuid, Some(MAX_EDGES_PER_ENTITY))
3961            {
3962                for edge in edges {
3963                    // Skip if already processed (edges are bidirectional in lookup)
3964                    if seen_edges.contains(&edge.uuid) {
3965                        continue;
3966                    }
3967                    seen_edges.insert(edge.uuid);
3968
3969                    // 4. Only count edges where BOTH endpoints are in this memory's entities
3970                    if entity_set.contains(&edge.from_entity)
3971                        && entity_set.contains(&edge.to_entity)
3972                    {
3973                        // Skip invalidated edges
3974                        if edge.invalidated_at.is_some() {
3975                            continue;
3976                        }
3977                        // Use effective_strength which applies time-based decay
3978                        strengths.push(edge.effective_strength());
3979                    }
3980                }
3981            }
3982        }
3983
3984        // 5. Return average strength, or neutral if no intra-memory edges
3985        if strengths.is_empty() {
3986            Some(0.5)
3987        } else {
3988            let avg = strengths.iter().sum::<f32>() / strengths.len() as f32;
3989            Some(avg)
3990        }
3991    }
3992
3993    /// Apply decay to a synapse
3994    ///
3995    /// Returns true if the synapse should be pruned (non-potentiated and below threshold)
3996    ///
3997    /// Uses a mutex to prevent race conditions during concurrent updates (SHO-64).
3998    pub fn decay_synapse(&self, edge_uuid: &Uuid) -> Result<bool> {
3999        // Lock to prevent concurrent read-modify-write race conditions
4000        let _guard = self
4001            .synapse_update_lock
4002            .try_lock_for(std::time::Duration::from_secs(5))
4003            .ok_or_else(|| anyhow::anyhow!("synapse_update_lock timeout in decay_synapse"))?;
4004
4005        if let Some(mut edge) = self.get_relationship(edge_uuid)? {
4006            let should_prune = edge.decay();
4007
4008            let key = edge.uuid.as_bytes();
4009            let value = bincode::serde::encode_to_vec(&edge, bincode::config::standard())?;
4010            self.db.put_cf(self.relationships_cf(), key, value)?;
4011
4012            return Ok(should_prune);
4013        }
4014
4015        Ok(false)
4016    }
4017
4018    /// Batch decay multiple synapses atomically
4019    ///
4020    /// Returns a vector of edge UUIDs that should be pruned.
4021    pub fn batch_decay_synapses(&self, edge_uuids: &[Uuid]) -> Result<Vec<Uuid>> {
4022        if edge_uuids.is_empty() {
4023            return Ok(Vec::new());
4024        }
4025
4026        // Single lock acquisition for entire batch, with timeout
4027        let _guard = self
4028            .synapse_update_lock
4029            .try_lock_for(std::time::Duration::from_secs(5))
4030            .ok_or_else(|| {
4031                anyhow::anyhow!("synapse_update_lock timeout in batch_decay_synapses")
4032            })?;
4033
4034        let mut batch = WriteBatch::default();
4035        let mut to_prune = Vec::new();
4036
4037        for edge_uuid in edge_uuids {
4038            if let Some(mut edge) = self.get_relationship(edge_uuid)? {
4039                let should_prune = edge.decay();
4040
4041                let key = edge.uuid.as_bytes();
4042                match bincode::serde::encode_to_vec(&edge, bincode::config::standard()) {
4043                    Ok(value) => {
4044                        batch.put_cf(self.relationships_cf(), key, value);
4045                        if should_prune {
4046                            to_prune.push(*edge_uuid);
4047                        }
4048                    }
4049                    Err(e) => {
4050                        tracing::debug!("Failed to serialize edge {}: {}", edge_uuid, e);
4051                    }
4052                }
4053            }
4054        }
4055
4056        // Atomic write of all updates
4057        self.db.write(batch)?;
4058
4059        Ok(to_prune)
4060    }
4061
4062    /// Apply decay to already-loaded edges in-place, avoiding double deserialization.
4063    ///
4064    /// Mutates edges directly, serializes results into a WriteBatch, and returns
4065    /// the UUIDs of edges that should be pruned. Used by `apply_decay()` which
4066    /// already has the full edge list from `get_all_relationships()`.
4067    fn batch_decay_edges_in_place(&self, edges: &mut [RelationshipEdge]) -> Result<Vec<Uuid>> {
4068        if edges.is_empty() {
4069            return Ok(Vec::new());
4070        }
4071
4072        let _guard = self
4073            .synapse_update_lock
4074            .try_lock_for(std::time::Duration::from_secs(5))
4075            .ok_or_else(|| {
4076                anyhow::anyhow!("synapse_update_lock timeout in batch_decay_edges_in_place")
4077            })?;
4078        let mut batch = WriteBatch::default();
4079        let mut to_prune = Vec::new();
4080
4081        for edge in edges.iter_mut() {
4082            let strength_before = edge.strength;
4083            let should_prune = edge.decay();
4084
4085            // Only write back edges whose strength actually changed (or need pruning).
4086            // With 300s maintenance intervals, most edges won't have meaningful decay,
4087            // so this reduces the WriteBatch from ~12MB (all 34k edges) to ~150KB.
4088            if should_prune || (edge.strength - strength_before).abs() > f32::EPSILON {
4089                let key = edge.uuid.as_bytes();
4090                match bincode::serde::encode_to_vec(&*edge, bincode::config::standard()) {
4091                    Ok(value) => {
4092                        batch.put_cf(self.relationships_cf(), key, value);
4093                        if should_prune {
4094                            to_prune.push(edge.uuid);
4095                        }
4096                    }
4097                    Err(e) => {
4098                        tracing::debug!("Failed to serialize edge {}: {}", edge.uuid, e);
4099                    }
4100                }
4101            }
4102        }
4103
4104        self.db.write(batch)?;
4105        Ok(to_prune)
4106    }
4107
4108    /// Apply decay to all synapses and prune weak edges (AUD-2)
4109    ///
4110    /// Called during maintenance cycle to:
4111    /// 1. Apply time-based decay to all edge strengths
4112    /// 2. Remove edges that have decayed below threshold
4113    /// 3. Detect orphaned entities (entities that lost all their edges)
4114    ///
4115    /// Returns a `GraphDecayResult` with pruned count and orphaned entity/memory IDs
4116    /// for Direction 2 coupling (edge pruning → orphan detection).
4117    pub fn apply_decay(&self) -> Result<crate::memory::types::GraphDecayResult> {
4118        // Get all edges (need full data for orphan tracking)
4119        let mut all_edges = self.get_all_relationships()?;
4120
4121        if all_edges.is_empty() {
4122            return Ok(crate::memory::types::GraphDecayResult::default());
4123        }
4124
4125        // Apply decay in-place on already-deserialized edges (avoids double deserialization)
4126        let to_prune = self.batch_decay_edges_in_place(&mut all_edges)?;
4127
4128        if to_prune.is_empty() {
4129            return Ok(crate::memory::types::GraphDecayResult::default());
4130        }
4131
4132        // Collect entity UUIDs from edges being pruned (candidates for orphan status)
4133        let pruned_set: std::collections::HashSet<Uuid> = to_prune.iter().copied().collect();
4134        let mut orphan_candidates: std::collections::HashSet<Uuid> =
4135            std::collections::HashSet::new();
4136        for edge in &all_edges {
4137            if pruned_set.contains(&edge.uuid) {
4138                orphan_candidates.insert(edge.from_entity);
4139                orphan_candidates.insert(edge.to_entity);
4140            }
4141        }
4142
4143        // Delete pruned edges
4144        let mut pruned_count = 0;
4145        for edge_uuid in &to_prune {
4146            if self.delete_relationship(edge_uuid)? {
4147                pruned_count += 1;
4148            }
4149        }
4150
4151        // Check which candidate entities became orphaned (lost ALL edges)
4152        // Delete orphaned entities to prevent stale index pollution
4153        let mut orphaned_entity_ids = Vec::new();
4154        for entity_uuid in &orphan_candidates {
4155            let remaining = self.get_entity_relationships(entity_uuid)?;
4156            if remaining.is_empty() {
4157                orphaned_entity_ids.push(entity_uuid.to_string());
4158                if let Err(e) = self.delete_entity(entity_uuid) {
4159                    tracing::warn!("Failed to delete orphaned entity {}: {}", entity_uuid, e);
4160                }
4161            }
4162        }
4163
4164        if pruned_count > 0 {
4165            tracing::debug!(
4166                "Graph decay: {} edges pruned (of {} total), {} entities orphaned",
4167                pruned_count,
4168                all_edges.len(),
4169                orphaned_entity_ids.len()
4170            );
4171        }
4172
4173        Ok(crate::memory::types::GraphDecayResult {
4174            pruned_count,
4175            orphaned_entity_ids,
4176            orphaned_memory_ids: Vec::new(), // Populated by memory layer via entity→memory lookup
4177        })
4178    }
4179
4180    /// Flush pending maintenance from opportunistic pruning queues.
4181    ///
4182    /// Called every maintenance cycle (5 min). Instead of scanning all 34k+ edges,
4183    /// this only processes edges that were found below prune threshold during normal
4184    /// reads (via `get_entity_relationships_limited`). Typical cost: 0-50 targeted
4185    /// deletes per cycle vs a full CF iterator scan.
4186    pub fn flush_pending_maintenance(&self) -> Result<crate::memory::types::GraphDecayResult> {
4187        // 1. Drain queues (fast — just swaps empty Vecs)
4188        let to_prune: Vec<Uuid> = std::mem::take(&mut *self.pending_prune.lock());
4189        let orphan_candidates: Vec<Uuid> = std::mem::take(&mut *self.pending_orphan_checks.lock());
4190
4191        if to_prune.is_empty() {
4192            return Ok(crate::memory::types::GraphDecayResult::default());
4193        }
4194
4195        // 2. Dedup UUIDs
4196        let to_prune: std::collections::HashSet<Uuid> = to_prune.into_iter().collect();
4197        let orphan_candidates: std::collections::HashSet<Uuid> =
4198            orphan_candidates.into_iter().collect();
4199
4200        // 3. Batch delete pruned edges
4201        let mut pruned_count = 0;
4202        for edge_uuid in &to_prune {
4203            if self.delete_relationship(edge_uuid)? {
4204                pruned_count += 1;
4205            }
4206        }
4207
4208        // 4. Check which candidate entities became orphaned (lost ALL edges)
4209        let mut orphaned_entity_ids = Vec::new();
4210        for entity_uuid in &orphan_candidates {
4211            let remaining = self.get_entity_relationships(entity_uuid)?;
4212            if remaining.is_empty() {
4213                orphaned_entity_ids.push(entity_uuid.to_string());
4214                if let Err(e) = self.delete_entity(entity_uuid) {
4215                    tracing::warn!("Failed to delete orphaned entity {}: {}", entity_uuid, e);
4216                }
4217            }
4218        }
4219
4220        if pruned_count > 0 {
4221            tracing::debug!(
4222                "Lazy pruning: {} edges deleted, {} entities orphaned",
4223                pruned_count,
4224                orphaned_entity_ids.len()
4225            );
4226        }
4227
4228        Ok(crate::memory::types::GraphDecayResult {
4229            pruned_count,
4230            orphaned_entity_ids,
4231            orphaned_memory_ids: Vec::new(),
4232        })
4233    }
4234
4235    /// Get graph statistics - O(1) using atomic counters
4236    pub fn get_stats(&self) -> Result<GraphStats> {
4237        Ok(GraphStats {
4238            entity_count: self.entity_count.load(Ordering::Relaxed),
4239            relationship_count: self.relationship_count.load(Ordering::Relaxed),
4240            episode_count: self.episode_count.load(Ordering::Relaxed),
4241        })
4242    }
4243
4244    /// Get all entities in the graph
4245    pub fn get_all_entities(&self) -> Result<Vec<EntityNode>> {
4246        let mut entities = Vec::new();
4247
4248        let mut read_opts = rocksdb::ReadOptions::default();
4249        read_opts.fill_cache(false);
4250        let iter =
4251            self.db
4252                .iterator_cf_opt(self.entities_cf(), read_opts, rocksdb::IteratorMode::Start);
4253        for (_, value) in iter.flatten() {
4254            if let Ok(entity) = bincode::serde::decode_from_slice::<EntityNode, _>(
4255                &value,
4256                bincode::config::standard(),
4257            )
4258            .map(|(v, _)| v)
4259            {
4260                entities.push(entity);
4261            }
4262        }
4263
4264        // Sort by mention count (most mentioned first)
4265        entities.sort_by(|a, b| b.mention_count.cmp(&a.mention_count));
4266
4267        Ok(entities)
4268    }
4269
4270    /// Get all relationships in the graph
4271    pub fn get_all_relationships(&self) -> Result<Vec<RelationshipEdge>> {
4272        let mut relationships = Vec::new();
4273
4274        // fill_cache(false) prevents this full scan from evicting hot data from
4275        // the block cache. Decompressed blocks are used transiently and freed
4276        // after the iterator advances, reducing peak C++ heap usage.
4277        let mut read_opts = rocksdb::ReadOptions::default();
4278        read_opts.fill_cache(false);
4279        let iter = self.db.iterator_cf_opt(
4280            self.relationships_cf(),
4281            read_opts,
4282            rocksdb::IteratorMode::Start,
4283        );
4284        for (_, value) in iter.flatten() {
4285            if let Ok(edge) = bincode::serde::decode_from_slice::<RelationshipEdge, _>(
4286                &value,
4287                bincode::config::standard(),
4288            )
4289            .map(|(v, _)| v)
4290            {
4291                // Only include non-invalidated relationships
4292                if edge.invalidated_at.is_none() {
4293                    relationships.push(edge);
4294                }
4295            }
4296        }
4297
4298        // Sort by strength (strongest first)
4299        relationships.sort_by(|a, b| b.strength.total_cmp(&a.strength));
4300
4301        Ok(relationships)
4302    }
4303
4304    /// Get the Memory Universe visualization data
4305    /// Returns entities as "stars" with positions based on their relationships,
4306    /// sized by salience, and colored by entity type.
4307    pub fn get_universe(&self) -> Result<MemoryUniverse> {
4308        let entities = self.get_all_entities()?;
4309        let relationships = self.get_all_relationships()?;
4310
4311        // Create entity UUID to index mapping for position calculation
4312        let entity_indices: HashMap<Uuid, usize> = entities
4313            .iter()
4314            .enumerate()
4315            .map(|(i, e)| (e.uuid, i))
4316            .collect();
4317
4318        // Calculate 3D positions using a force-directed layout approximation
4319        // High-salience entities are positioned more centrally
4320        let mut stars: Vec<UniverseStar> = entities
4321            .iter()
4322            .enumerate()
4323            .map(|(i, entity)| {
4324                // Use a spiral galaxy layout with salience affecting radius
4325                // Higher salience = closer to center
4326                let angle = (i as f32) * 2.4; // Golden angle for even distribution
4327                let base_radius = 1.0 - entity.salience; // High salience = small radius
4328                let radius = base_radius * 100.0 + 10.0; // 10-110 range
4329
4330                let x = radius * angle.cos();
4331                let y = radius * angle.sin();
4332                let z = ((i as f32) * 0.1).sin() * 20.0; // Slight z variation
4333
4334                UniverseStar {
4335                    id: entity.uuid.to_string(),
4336                    name: entity.name.clone(),
4337                    entity_type: entity.labels.first().map(|l| l.as_str().to_string()),
4338                    salience: entity.salience,
4339                    mention_count: entity.mention_count,
4340                    is_proper_noun: entity.is_proper_noun,
4341                    position: Position3D { x, y, z },
4342                    color: entity_type_color(entity.labels.first()),
4343                    size: 5.0 + entity.salience * 20.0, // Size 5-25 based on salience
4344                }
4345            })
4346            .collect();
4347
4348        // Apply gravitational forces FIRST, before creating connections
4349        // This ensures connection positions match final star positions
4350        for rel in &relationships {
4351            if let (Some(from_idx), Some(to_idx)) = (
4352                entity_indices.get(&rel.from_entity),
4353                entity_indices.get(&rel.to_entity),
4354            ) {
4355                // Apply small gravitational pull based on effective (decay-aware) strength
4356                let pull_factor = rel.effective_strength() * 0.05;
4357
4358                let from_pos = stars[*from_idx].position.clone();
4359                let to_pos = stars[*to_idx].position.clone();
4360
4361                let dx = (to_pos.x - from_pos.x) * pull_factor;
4362                let dy = (to_pos.y - from_pos.y) * pull_factor;
4363                let dz = (to_pos.z - from_pos.z) * pull_factor;
4364
4365                stars[*from_idx].position.x += dx;
4366                stars[*from_idx].position.y += dy;
4367                stars[*from_idx].position.z += dz;
4368
4369                stars[*to_idx].position.x -= dx;
4370                stars[*to_idx].position.y -= dy;
4371                stars[*to_idx].position.z -= dz;
4372            }
4373        }
4374
4375        // Create gravitational connections AFTER star positions are finalized
4376        // This ensures from_position/to_position match current star positions
4377        let connections: Vec<GravitationalConnection> = relationships
4378            .iter()
4379            .filter_map(|rel| {
4380                let from_idx = entity_indices.get(&rel.from_entity)?;
4381                let to_idx = entity_indices.get(&rel.to_entity)?;
4382
4383                Some(GravitationalConnection {
4384                    id: rel.uuid.to_string(),
4385                    from_id: rel.from_entity.to_string(),
4386                    to_id: rel.to_entity.to_string(),
4387                    relation_type: rel.relation_type.as_str().to_string(),
4388                    strength: rel.effective_strength(),
4389                    from_position: stars[*from_idx].position.clone(),
4390                    to_position: stars[*to_idx].position.clone(),
4391                })
4392            })
4393            .collect();
4394
4395        // Calculate universe bounds
4396        let (min_x, max_x, min_y, max_y, min_z, max_z) = stars.iter().fold(
4397            (f32::MAX, f32::MIN, f32::MAX, f32::MIN, f32::MAX, f32::MIN),
4398            |(min_x, max_x, min_y, max_y, min_z, max_z), star| {
4399                (
4400                    min_x.min(star.position.x),
4401                    max_x.max(star.position.x),
4402                    min_y.min(star.position.y),
4403                    max_y.max(star.position.y),
4404                    min_z.min(star.position.z),
4405                    max_z.max(star.position.z),
4406                )
4407            },
4408        );
4409
4410        Ok(MemoryUniverse {
4411            stars,
4412            connections,
4413            total_entities: entities.len(),
4414            total_connections: relationships.len(),
4415            bounds: UniverseBounds {
4416                min: Position3D {
4417                    x: min_x,
4418                    y: min_y,
4419                    z: min_z,
4420                },
4421                max: Position3D {
4422                    x: max_x,
4423                    y: max_y,
4424                    z: max_z,
4425                },
4426            },
4427        })
4428    }
4429}
4430
4431/// Helper function to get color for entity type
4432fn entity_type_color(label: Option<&EntityLabel>) -> String {
4433    match label {
4434        Some(EntityLabel::Person) => "#FF6B6B".to_string(), // Coral red
4435        Some(EntityLabel::Organization) => "#4ECDC4".to_string(), // Teal
4436        Some(EntityLabel::Location) => "#45B7D1".to_string(), // Sky blue
4437        Some(EntityLabel::Technology) => "#96CEB4".to_string(), // Sage green
4438        Some(EntityLabel::Product) => "#FFEAA7".to_string(), // Soft yellow
4439        Some(EntityLabel::Event) => "#DDA0DD".to_string(),  // Plum
4440        Some(EntityLabel::Skill) => "#98D8C8".to_string(),  // Mint
4441        Some(EntityLabel::Concept) => "#F7DC6F".to_string(), // Gold
4442        Some(EntityLabel::Date) => "#BB8FCE".to_string(),   // Light purple
4443        Some(EntityLabel::Keyword) => "#FF9F43".to_string(), // Orange for YAKE keywords
4444        Some(EntityLabel::Other(_)) => "#AEB6BF".to_string(), // Gray
4445        None => "#AEB6BF".to_string(),                      // Gray default
4446    }
4447}
4448
4449/// 3D position in the memory universe
4450#[derive(Debug, Clone, Serialize, Deserialize)]
4451pub struct Position3D {
4452    pub x: f32,
4453    pub y: f32,
4454    pub z: f32,
4455}
4456
4457/// A star in the memory universe (represents an entity)
4458#[derive(Debug, Clone, Serialize, Deserialize)]
4459pub struct UniverseStar {
4460    pub id: String,
4461    pub name: String,
4462    pub entity_type: Option<String>,
4463    pub salience: f32,
4464    pub mention_count: usize,
4465    pub is_proper_noun: bool,
4466    pub position: Position3D,
4467    pub color: String,
4468    pub size: f32,
4469}
4470
4471/// A gravitational connection between stars (represents a relationship)
4472#[derive(Debug, Clone, Serialize, Deserialize)]
4473pub struct GravitationalConnection {
4474    pub id: String,
4475    pub from_id: String,
4476    pub to_id: String,
4477    pub relation_type: String,
4478    pub strength: f32,
4479    pub from_position: Position3D,
4480    pub to_position: Position3D,
4481}
4482
4483/// Bounds of the memory universe
4484#[derive(Debug, Clone, Serialize, Deserialize)]
4485pub struct UniverseBounds {
4486    pub min: Position3D,
4487    pub max: Position3D,
4488}
4489
4490/// The complete memory universe visualization
4491#[derive(Debug, Clone, Serialize, Deserialize)]
4492pub struct MemoryUniverse {
4493    pub stars: Vec<UniverseStar>,
4494    pub connections: Vec<GravitationalConnection>,
4495    pub total_entities: usize,
4496    pub total_connections: usize,
4497    pub bounds: UniverseBounds,
4498}
4499
4500/// Entity with hop distance from traversal origin
4501#[derive(Debug, Clone, Serialize, Deserialize)]
4502pub struct TraversedEntity {
4503    pub entity: EntityNode,
4504    /// Number of hops from the starting entity (0 = start entity)
4505    pub hop_distance: usize,
4506    /// Decay factor based on hop distance: 1.0 at hop 0, decays with each hop
4507    pub decay_factor: f32,
4508}
4509
4510/// Result of graph traversal
4511#[derive(Debug, Clone, Serialize, Deserialize)]
4512pub struct GraphTraversal {
4513    /// Entities found during traversal with hop distance info
4514    pub entities: Vec<TraversedEntity>,
4515    pub relationships: Vec<RelationshipEdge>,
4516}
4517
4518/// Graph statistics
4519#[derive(Debug, Clone, Serialize, Deserialize)]
4520pub struct GraphStats {
4521    pub entity_count: usize,
4522    pub relationship_count: usize,
4523    pub episode_count: usize,
4524}
4525
4526/// Extracted entity with salience information
4527#[derive(Debug, Clone)]
4528pub struct ExtractedEntity {
4529    pub name: String,
4530    pub label: EntityLabel,
4531    pub base_salience: f32,
4532}
4533
4534/// Simple entity extraction (rule-based NER) with salience detection
4535pub struct EntityExtractor {
4536    /// Common person name indicators
4537    person_indicators: HashSet<String>,
4538
4539    /// Common organization indicators (suffixes like Inc, Corp)
4540    org_indicators: HashSet<String>,
4541
4542    /// Known organization names (direct matches)
4543    org_keywords: HashSet<String>,
4544
4545    /// Known location names (cities, countries, regions)
4546    location_keywords: HashSet<String>,
4547
4548    /// Common technology keywords
4549    tech_keywords: HashSet<String>,
4550
4551    /// Common words that should NOT be extracted as entities
4552    /// (stop words that start with capitals at sentence beginning)
4553    stop_words: HashSet<String>,
4554}
4555
4556impl EntityExtractor {
4557    pub fn new() -> Self {
4558        let person_indicators: HashSet<String> =
4559            vec!["mr", "mrs", "ms", "dr", "prof", "sir", "madam"]
4560                .into_iter()
4561                .map(String::from)
4562                .collect();
4563
4564        let org_indicators: HashSet<String> = vec![
4565            "inc",
4566            "corp",
4567            "ltd",
4568            "llc",
4569            "company",
4570            "corporation",
4571            "university",
4572            "institute",
4573            "foundation",
4574        ]
4575        .into_iter()
4576        .map(String::from)
4577        .collect();
4578
4579        let tech_keywords: HashSet<String> = vec![
4580            "rust",
4581            "python",
4582            "java",
4583            "javascript",
4584            "typescript",
4585            "react",
4586            "vue",
4587            "angular",
4588            "docker",
4589            "kubernetes",
4590            "aws",
4591            "azure",
4592            "gcp",
4593            "sql",
4594            "nosql",
4595            "mongodb",
4596            "postgresql",
4597            "redis",
4598            "kafka",
4599            "api",
4600            "rest",
4601            "graphql",
4602        ]
4603        .into_iter()
4604        .map(String::from)
4605        .collect();
4606
4607        // Known organization names (global - India-first, then worldwide)
4608        let org_keywords: HashSet<String> = vec![
4609            // Indian Companies - IT/Tech
4610            "tcs",
4611            "infosys",
4612            "wipro",
4613            "hcl",
4614            "tech mahindra",
4615            "cognizant",
4616            "mindtree",
4617            "mphasis",
4618            "ltimindtree",
4619            "persistent",
4620            "zensar",
4621            "cyient",
4622            "hexaware",
4623            "coforge",
4624            "birlasoft",
4625            "sonata software",
4626            "mastek",
4627            "newgen",
4628            // Indian Companies - Startups/Unicorns
4629            "flipkart",
4630            "paytm",
4631            "zomato",
4632            "swiggy",
4633            "ola",
4634            "oyo",
4635            "byju's",
4636            "byjus",
4637            "razorpay",
4638            "phonepe",
4639            "cred",
4640            "zerodha",
4641            "groww",
4642            "upstox",
4643            "policybazaar",
4644            "nykaa",
4645            "meesho",
4646            "udaan",
4647            "delhivery",
4648            "freshworks",
4649            "zoho",
4650            "postman",
4651            "browserstack",
4652            "chargebee",
4653            "clevertap",
4654            "druva",
4655            "hasura",
4656            "innovaccer",
4657            "lenskart",
4658            "mamaearth",
4659            "unacademy",
4660            "vedantu",
4661            "physicswallah",
4662            "dream11",
4663            "mpl",
4664            "winzo",
4665            "slice",
4666            "jupiter",
4667            "fi",
4668            "niyo",
4669            "smallcase",
4670            "koo",
4671            "sharechat",
4672            "dailyhunt",
4673            "pratilipi",
4674            "inshorts",
4675            "rapido",
4676            "urban company",
4677            "dunzo",
4678            "bigbasket",
4679            "grofers",
4680            "blinkit",
4681            "jiomart",
4682            "tata neu",
4683            // Indian Conglomerates
4684            "tata",
4685            "reliance",
4686            "jio",
4687            "adani",
4688            "birla",
4689            "mahindra",
4690            "godrej",
4691            "bajaj",
4692            "hdfc",
4693            "icici",
4694            "kotak",
4695            "axis",
4696            "sbi",
4697            "bharti",
4698            "airtel",
4699            "vodafone",
4700            "idea",
4701            "hero",
4702            "tvs",
4703            "maruti",
4704            "suzuki",
4705            "hyundai",
4706            "kia",
4707            "mg",
4708            "tata motors",
4709            "larsen",
4710            "toubro",
4711            "l&t",
4712            "itc",
4713            "hindustan unilever",
4714            "hul",
4715            "nestle",
4716            "britannia",
4717            "parle",
4718            "amul",
4719            "dabur",
4720            "patanjali",
4721            "emami",
4722            "marico",
4723            // Indian Banks & Finance
4724            "rbi",
4725            "sebi",
4726            "nse",
4727            "bse",
4728            "npci",
4729            "upi",
4730            "bhim",
4731            "paisa",
4732            "mswipe",
4733            "pine labs",
4734            "billdesk",
4735            "ccavenue",
4736            "instamojo",
4737            "cashfree",
4738            // Indian Institutions
4739            "iit",
4740            "iim",
4741            "iisc",
4742            "nit",
4743            "bits",
4744            "isro",
4745            "drdo",
4746            "barc",
4747            "tifr",
4748            "aiims",
4749            "iiser",
4750            "iiit",
4751            "srm",
4752            "vit",
4753            "manipal",
4754            "amity",
4755            "lovely",
4756            // Global Tech Giants
4757            "microsoft",
4758            "google",
4759            "apple",
4760            "amazon",
4761            "meta",
4762            "facebook",
4763            "netflix",
4764            "alphabet",
4765            "youtube",
4766            "instagram",
4767            "whatsapp",
4768            "tiktok",
4769            "snapchat",
4770            "twitter",
4771            "x",
4772            "linkedin",
4773            "pinterest",
4774            "reddit",
4775            "discord",
4776            "telegram",
4777            // Global Enterprise Tech
4778            "salesforce",
4779            "oracle",
4780            "ibm",
4781            "sap",
4782            "vmware",
4783            "dell",
4784            "hp",
4785            "hpe",
4786            "cisco",
4787            "juniper",
4788            "palo alto",
4789            "crowdstrike",
4790            "fortinet",
4791            "splunk",
4792            "servicenow",
4793            "workday",
4794            "atlassian",
4795            "jira",
4796            "confluence",
4797            "trello",
4798            "asana",
4799            "monday",
4800            "notion",
4801            "airtable",
4802            "figma",
4803            "canva",
4804            "miro",
4805            // Global Cloud & Infrastructure
4806            "aws",
4807            "azure",
4808            "gcp",
4809            "digitalocean",
4810            "linode",
4811            "vultr",
4812            "cloudflare",
4813            "akamai",
4814            "fastly",
4815            "vercel",
4816            "netlify",
4817            "heroku",
4818            "render",
4819            "railway",
4820            // Global Hardware/Chip
4821            "intel",
4822            "amd",
4823            "nvidia",
4824            "qualcomm",
4825            "broadcom",
4826            "arm",
4827            "tsmc",
4828            "samsung",
4829            "mediatek",
4830            "apple silicon",
4831            "marvell",
4832            "micron",
4833            "sk hynix",
4834            "western digital",
4835            // Global AI/ML Companies
4836            "openai",
4837            "anthropic",
4838            "deepmind",
4839            "cohere",
4840            "stability",
4841            "midjourney",
4842            "hugging face",
4843            "databricks",
4844            "snowflake",
4845            "palantir",
4846            "c3ai",
4847            "datarobot",
4848            // Global Fintech
4849            "stripe",
4850            "square",
4851            "block",
4852            "paypal",
4853            "venmo",
4854            "wise",
4855            "revolut",
4856            "robinhood",
4857            "coinbase",
4858            "binance",
4859            "kraken",
4860            "gemini",
4861            "ftx",
4862            "blockchain",
4863            "ripple",
4864            // Global Dev Tools
4865            "github",
4866            "gitlab",
4867            "bitbucket",
4868            "jetbrains",
4869            "vscode",
4870            "sublime",
4871            "vim",
4872            "docker",
4873            "kubernetes",
4874            "terraform",
4875            "ansible",
4876            "puppet",
4877            "chef",
4878            // Global Consulting
4879            "accenture",
4880            "deloitte",
4881            "pwc",
4882            "kpmg",
4883            "ey",
4884            "mckinsey",
4885            "bcg",
4886            "bain",
4887            // Global Auto/EV
4888            "tesla",
4889            "rivian",
4890            "lucid",
4891            "nio",
4892            "byd",
4893            "xpeng",
4894            "volkswagen",
4895            "bmw",
4896            "mercedes",
4897            "audi",
4898            "porsche",
4899            "toyota",
4900            "honda",
4901            "nissan",
4902            "ford",
4903            "gm",
4904            // Global Aerospace
4905            "spacex",
4906            "boeing",
4907            "airbus",
4908            "lockheed",
4909            "northrop",
4910            "raytheon",
4911            "nasa",
4912            "esa",
4913            "jaxa",
4914            "isro",
4915            "blue origin",
4916            "virgin galactic",
4917            // Universities - India
4918            "delhi university",
4919            "jnu",
4920            "bhu",
4921            "amu",
4922            "jadavpur",
4923            "presidency",
4924            "st stephens",
4925            "loyola",
4926            "xavier",
4927            "symbiosis",
4928            "nmims",
4929            "sp jain",
4930            "xlri",
4931            "fms",
4932            "iift",
4933            "mdi",
4934            "great lakes",
4935            "ism dhanbad",
4936            // Universities - Global
4937            "mit",
4938            "stanford",
4939            "harvard",
4940            "yale",
4941            "princeton",
4942            "caltech",
4943            "berkeley",
4944            "oxford",
4945            "cambridge",
4946            "imperial",
4947            "eth zurich",
4948            "epfl",
4949            "tsinghua",
4950            "peking",
4951            "nus",
4952            "nanyang",
4953            "kaist",
4954            "university of tokyo",
4955            "melbourne",
4956        ]
4957        .into_iter()
4958        .map(String::from)
4959        .collect();
4960
4961        // Known location names (global - India-first, then worldwide)
4962        let location_keywords: HashSet<String> = vec![
4963            // Indian Metro Cities
4964            "mumbai",
4965            "delhi",
4966            "bangalore",
4967            "bengaluru",
4968            "hyderabad",
4969            "chennai",
4970            "kolkata",
4971            "pune",
4972            "ahmedabad",
4973            "surat",
4974            "jaipur",
4975            "lucknow",
4976            // Indian Tier-1 Cities
4977            "kochi",
4978            "cochin",
4979            "thiruvananthapuram",
4980            "trivandrum",
4981            "coimbatore",
4982            "madurai",
4983            "visakhapatnam",
4984            "vizag",
4985            "vijayawada",
4986            "nagpur",
4987            "indore",
4988            "bhopal",
4989            "chandigarh",
4990            "mohali",
4991            "panchkula",
4992            "noida",
4993            "gurgaon",
4994            "gurugram",
4995            "faridabad",
4996            "ghaziabad",
4997            "greater noida",
4998            "dwarka",
4999            // Indian Tier-2 Cities
5000            "mysore",
5001            "mangalore",
5002            "hubli",
5003            "belgaum",
5004            "nashik",
5005            "aurangabad",
5006            "rajkot",
5007            "vadodara",
5008            "baroda",
5009            "gandhinagar",
5010            "kanpur",
5011            "varanasi",
5012            "allahabad",
5013            "prayagraj",
5014            "agra",
5015            "meerut",
5016            "dehradun",
5017            "rishikesh",
5018            "haridwar",
5019            "amritsar",
5020            "jalandhar",
5021            "ludhiana",
5022            "shimla",
5023            "manali",
5024            "dharamshala",
5025            "jammu",
5026            "srinagar",
5027            "ranchi",
5028            "jamshedpur",
5029            "patna",
5030            "guwahati",
5031            "shillong",
5032            "imphal",
5033            "kohima",
5034            "gangtok",
5035            "darjeeling",
5036            "bhubaneswar",
5037            "cuttack",
5038            "rourkela",
5039            "raipur",
5040            "bilaspur",
5041            // Indian States & UTs
5042            "maharashtra",
5043            "karnataka",
5044            "tamil nadu",
5045            "telangana",
5046            "andhra pradesh",
5047            "kerala",
5048            "gujarat",
5049            "rajasthan",
5050            "uttar pradesh",
5051            "madhya pradesh",
5052            "west bengal",
5053            "bihar",
5054            "odisha",
5055            "jharkhand",
5056            "chhattisgarh",
5057            "punjab",
5058            "haryana",
5059            "himachal pradesh",
5060            "uttarakhand",
5061            "goa",
5062            "assam",
5063            "meghalaya",
5064            "manipur",
5065            "nagaland",
5066            "tripura",
5067            "mizoram",
5068            "arunachal pradesh",
5069            "sikkim",
5070            "jammu and kashmir",
5071            "ladakh",
5072            // Indian Regions
5073            "silicon valley of india",
5074            "electronic city",
5075            "whitefield",
5076            "marathahalli",
5077            "koramangala",
5078            "indiranagar",
5079            "hsr layout",
5080            "jayanagar",
5081            "malleshwaram",
5082            "bandra",
5083            "andheri",
5084            "powai",
5085            "lower parel",
5086            "bkc",
5087            "navi mumbai",
5088            "thane",
5089            "connaught place",
5090            "nehru place",
5091            "saket",
5092            "cyber city",
5093            "dlf",
5094            "hitech city",
5095            "madhapur",
5096            "gachibowli",
5097            "ecr",
5098            "omr",
5099            "it corridor",
5100            // Asian Cities
5101            "singapore",
5102            "hong kong",
5103            "tokyo",
5104            "osaka",
5105            "seoul",
5106            "busan",
5107            "beijing",
5108            "shanghai",
5109            "shenzhen",
5110            "guangzhou",
5111            "hangzhou",
5112            "taipei",
5113            "bangkok",
5114            "kuala lumpur",
5115            "jakarta",
5116            "manila",
5117            "ho chi minh",
5118            "hanoi",
5119            "dubai",
5120            "abu dhabi",
5121            "doha",
5122            "riyadh",
5123            "tel aviv",
5124            "istanbul",
5125            // European Cities
5126            "london",
5127            "paris",
5128            "berlin",
5129            "munich",
5130            "frankfurt",
5131            "amsterdam",
5132            "rotterdam",
5133            "brussels",
5134            "zurich",
5135            "geneva",
5136            "vienna",
5137            "prague",
5138            "warsaw",
5139            "budapest",
5140            "barcelona",
5141            "madrid",
5142            "milan",
5143            "rome",
5144            "lisbon",
5145            "dublin",
5146            "edinburgh",
5147            "manchester",
5148            "stockholm",
5149            "oslo",
5150            "helsinki",
5151            "copenhagen",
5152            "athens",
5153            "moscow",
5154            "st petersburg",
5155            // North American Cities
5156            "new york",
5157            "los angeles",
5158            "san francisco",
5159            "seattle",
5160            "boston",
5161            "chicago",
5162            "austin",
5163            "denver",
5164            "portland",
5165            "miami",
5166            "atlanta",
5167            "dallas",
5168            "houston",
5169            "phoenix",
5170            "san diego",
5171            "san jose",
5172            "oakland",
5173            "palo alto",
5174            "mountain view",
5175            "cupertino",
5176            "menlo park",
5177            "redwood city",
5178            "washington dc",
5179            "philadelphia",
5180            "detroit",
5181            "toronto",
5182            "vancouver",
5183            "montreal",
5184            "calgary",
5185            "ottawa",
5186            "mexico city",
5187            "guadalajara",
5188            // South American Cities
5189            "sao paulo",
5190            "rio de janeiro",
5191            "buenos aires",
5192            "santiago",
5193            "bogota",
5194            "lima",
5195            "medellin",
5196            "cartagena",
5197            // African Cities
5198            "johannesburg",
5199            "cape town",
5200            "lagos",
5201            "nairobi",
5202            "cairo",
5203            "casablanca",
5204            "accra",
5205            "addis ababa",
5206            "kigali",
5207            // Australian/NZ Cities
5208            "sydney",
5209            "melbourne",
5210            "brisbane",
5211            "perth",
5212            "auckland",
5213            "wellington",
5214            // Countries - Asia
5215            "india",
5216            "china",
5217            "japan",
5218            "south korea",
5219            "korea",
5220            "taiwan",
5221            "singapore",
5222            "malaysia",
5223            "thailand",
5224            "vietnam",
5225            "indonesia",
5226            "philippines",
5227            "bangladesh",
5228            "pakistan",
5229            "sri lanka",
5230            "nepal",
5231            "bhutan",
5232            "myanmar",
5233            "cambodia",
5234            "laos",
5235            // Countries - Middle East
5236            "uae",
5237            "emirates",
5238            "saudi arabia",
5239            "qatar",
5240            "bahrain",
5241            "kuwait",
5242            "oman",
5243            "israel",
5244            "turkey",
5245            "iran",
5246            "iraq",
5247            "jordan",
5248            "lebanon",
5249            "egypt",
5250            // Countries - Europe
5251            "uk",
5252            "united kingdom",
5253            "britain",
5254            "england",
5255            "scotland",
5256            "wales",
5257            "ireland",
5258            "france",
5259            "germany",
5260            "italy",
5261            "spain",
5262            "portugal",
5263            "netherlands",
5264            "belgium",
5265            "switzerland",
5266            "austria",
5267            "poland",
5268            "czech",
5269            "hungary",
5270            "romania",
5271            "bulgaria",
5272            "greece",
5273            "sweden",
5274            "norway",
5275            "finland",
5276            "denmark",
5277            "russia",
5278            "ukraine",
5279            // Countries - Americas
5280            "usa",
5281            "united states",
5282            "america",
5283            "canada",
5284            "mexico",
5285            "brazil",
5286            "argentina",
5287            "chile",
5288            "colombia",
5289            "peru",
5290            "venezuela",
5291            // Countries - Africa/Oceania
5292            "south africa",
5293            "nigeria",
5294            "kenya",
5295            "ghana",
5296            "ethiopia",
5297            "rwanda",
5298            "australia",
5299            "new zealand",
5300            // Famous Tech Hubs
5301            "silicon valley",
5302            "bay area",
5303            "wall street",
5304            "tech city",
5305            "shoreditch",
5306            "station f",
5307            "blockchain island",
5308            "crypto valley",
5309            "startup nation",
5310            "innovation district",
5311            "tech park",
5312            "it park",
5313            "sez",
5314            "special economic zone",
5315        ]
5316        .into_iter()
5317        .map(String::from)
5318        .collect();
5319
5320        // Stop words: common words that appear capitalized at sentence start
5321        // These aren't named entities even when capitalized
5322        let stop_words: HashSet<String> = vec![
5323            // Articles & pronouns
5324            "the", "a", "an", "this", "that", "these", "those", "i", "we", "you", "he", "she", "it",
5325            "they", // Common verbs (appear at sentence start)
5326            "is", "are", "was", "were", "been", "being", "have", "has", "had", "do", "does", "did",
5327            "will", "would", "could", "should", "may", "might", // Question words
5328            "if", "when", "where", "what", "why", "how",
5329        ]
5330        .into_iter()
5331        .map(String::from)
5332        .collect();
5333
5334        Self {
5335            person_indicators,
5336            org_indicators,
5337            org_keywords,
5338            location_keywords,
5339            tech_keywords,
5340            stop_words,
5341        }
5342    }
5343
5344    /// Calculate base salience for an entity based on its type and detection confidence
5345    ///
5346    /// Salience values by entity type:
5347    /// - Person: 0.8 (highest - people are key context)
5348    /// - Organization/Product: 0.7
5349    /// - Location/Technology/Event: 0.6
5350    /// - Skill: 0.5
5351    /// - Concept: 0.4
5352    /// - Date/Other: 0.3
5353    ///
5354    /// Proper nouns receive a 20% boost (capped at 1.0).
5355    pub fn calculate_base_salience(label: &EntityLabel, is_proper_noun: bool) -> f32 {
5356        let type_salience = match label {
5357            EntityLabel::Person => 0.8,       // People are highly salient
5358            EntityLabel::Organization => 0.7, // Organizations are important
5359            EntityLabel::Location => 0.6,     // Locations matter for context
5360            EntityLabel::Technology => 0.6,   // Tech keywords matter for dev context
5361            EntityLabel::Product => 0.7,      // Products are specific entities
5362            EntityLabel::Event => 0.6,        // Events are temporal anchors
5363            EntityLabel::Skill => 0.5,        // Skills are somewhat important
5364            EntityLabel::Keyword => 0.55,     // YAKE keywords - discriminative terms
5365            EntityLabel::Concept => 0.4,      // Concepts are more generic
5366            EntityLabel::Date => 0.3,         // Dates are structural, not salient
5367            EntityLabel::Other(_) => 0.3,     // Unknown types get low salience
5368        };
5369
5370        // Proper nouns get a 20% boost
5371        if is_proper_noun {
5372            (type_salience * 1.2_f32).min(1.0_f32)
5373        } else {
5374            type_salience
5375        }
5376    }
5377
5378    /// Check if a word is likely a proper noun (not just capitalized at sentence start)
5379    fn is_likely_proper_noun(&self, word: &str, position: usize, prev_char: Option<char>) -> bool {
5380        // If it's not at position 0 and is capitalized, it's likely a proper noun
5381        if position > 0 {
5382            return true;
5383        }
5384
5385        // At position 0, check if previous character was punctuation (sentence start)
5386        // If previous char was '.', '!', '?' then this might just be sentence capitalization
5387        if let Some(c) = prev_char {
5388            if c == '.' || c == '!' || c == '?' {
5389                // It's at sentence start - could be either
5390                // Check if it's a common word
5391                let lower = word.to_lowercase();
5392                return !self.stop_words.contains(&lower);
5393            }
5394        }
5395
5396        // Default to proper noun for capitalized words
5397        true
5398    }
5399
5400    /// Extract entities from text with salience information
5401    pub fn extract_with_salience(&self, text: &str) -> Vec<ExtractedEntity> {
5402        let mut entities = Vec::new();
5403        let mut seen = HashSet::new();
5404        let mut skip_until_index = 0; // For skipping sub-spans of multi-word entities
5405
5406        // Split into words and detect capitalized sequences
5407        let words: Vec<&str> = text.split_whitespace().collect();
5408
5409        for (i, word) in words.iter().enumerate() {
5410            // Skip if this word is part of a multi-word entity we already extracted
5411            if i < skip_until_index {
5412                continue;
5413            }
5414
5415            let clean_word = word.trim_matches(|c: char| !c.is_alphanumeric());
5416
5417            if clean_word.is_empty() {
5418                continue;
5419            }
5420
5421            let lower = clean_word.to_lowercase();
5422
5423            // Skip common stop words
5424            if self.stop_words.contains(&lower) {
5425                continue;
5426            }
5427
5428            // Check for known organization keywords (direct match, min 2 chars to filter "x" noise)
5429            if lower.len() >= 2 && self.org_keywords.contains(&lower) && !seen.contains(&lower) {
5430                let entity = ExtractedEntity {
5431                    name: clean_word.to_string(),
5432                    label: EntityLabel::Organization,
5433                    base_salience: Self::calculate_base_salience(&EntityLabel::Organization, true),
5434                };
5435                entities.push(entity);
5436                seen.insert(lower.clone());
5437                continue;
5438            }
5439
5440            // Check for known location keywords (direct match)
5441            if self.location_keywords.contains(&lower) && !seen.contains(&lower) {
5442                let entity = ExtractedEntity {
5443                    name: clean_word.to_string(),
5444                    label: EntityLabel::Location,
5445                    base_salience: Self::calculate_base_salience(&EntityLabel::Location, true),
5446                };
5447                entities.push(entity);
5448                seen.insert(lower.clone());
5449                continue;
5450            }
5451
5452            // Check for technology keywords (always proper nouns in tech context)
5453            if self.tech_keywords.contains(&lower) && !seen.contains(&lower) {
5454                let entity = ExtractedEntity {
5455                    name: clean_word.to_string(),
5456                    label: EntityLabel::Technology,
5457                    base_salience: Self::calculate_base_salience(&EntityLabel::Technology, true),
5458                };
5459                entities.push(entity);
5460                seen.insert(lower.clone());
5461                continue;
5462            }
5463
5464            // Check for capitalized words (potential entities)
5465            if clean_word
5466                .chars()
5467                .next()
5468                .map(|c| c.is_uppercase())
5469                .unwrap_or(false)
5470            {
5471                let mut entity_name = clean_word.to_string();
5472                let mut entity_label = EntityLabel::Other("Unknown".to_string());
5473
5474                // Determine previous character for proper noun detection
5475                let prev_char = if i > 0 {
5476                    words[i - 1].chars().last()
5477                } else {
5478                    None
5479                };
5480
5481                let is_proper = self.is_likely_proper_noun(clean_word, i, prev_char);
5482
5483                // Check for person indicators
5484                if i > 0
5485                    && self
5486                        .person_indicators
5487                        .contains(&words[i - 1].to_lowercase())
5488                {
5489                    entity_label = EntityLabel::Person;
5490                }
5491
5492                // Check for multi-word capitalized sequences.
5493                // Include capitalized stop words (Of, The, And) in entity names
5494                // to preserve proper nouns like "Bank Of America", "University Of Delhi".
5495                let mut j = i + 1;
5496                while j < words.len()
5497                    && words[j]
5498                        .chars()
5499                        .next()
5500                        .map(|c| c.is_uppercase())
5501                        .unwrap_or(false)
5502                {
5503                    let next_word = words[j].trim_matches(|c: char| !c.is_alphanumeric());
5504                    entity_name.push(' ');
5505                    entity_name.push_str(next_word);
5506                    j += 1;
5507                }
5508
5509                // Set skip_until_index to avoid extracting sub-spans
5510                // e.g., if we extracted "John Smith", skip "Smith" on next iteration
5511                if j > i + 1 {
5512                    skip_until_index = j;
5513                }
5514
5515                let entity_name_lower = entity_name.to_lowercase();
5516
5517                // Check multi-word entity against known lists
5518                if self.org_keywords.contains(&entity_name_lower) {
5519                    entity_label = EntityLabel::Organization;
5520                } else if self.location_keywords.contains(&entity_name_lower) {
5521                    entity_label = EntityLabel::Location;
5522                }
5523
5524                // Check for organization indicators (suffixes)
5525                if matches!(entity_label, EntityLabel::Other(_)) {
5526                    for word in entity_name.split_whitespace() {
5527                        if self.org_indicators.contains(&word.to_lowercase()) {
5528                            entity_label = EntityLabel::Organization;
5529                            break;
5530                        }
5531                    }
5532                }
5533
5534                // Only extract entities we have evidence for
5535                // Don't guess on single unknown capitalized words - they're often noise
5536                if matches!(entity_label, EntityLabel::Other(_)) {
5537                    if entity_name.contains(' ') {
5538                        // Multi-word capitalized sequences (like "John Smith", "New York")
5539                        // are likely proper names — use Concept as safe default
5540                        // Concept(0.4) + proper noun boost(1.2x) = 0.48 salience
5541                        // Hebbian strengthening will promote genuinely important entities
5542                        entity_label = EntityLabel::Concept;
5543                    } else {
5544                        // Single capitalized word not in any keyword list
5545                        // Skip it - we don't have enough evidence it's a real entity
5546                        // The neural NER model handles these cases properly
5547                        continue;
5548                    }
5549                }
5550
5551                let entity_key = entity_name_lower;
5552                if !seen.contains(&entity_key) {
5553                    let base_salience = Self::calculate_base_salience(&entity_label, is_proper);
5554                    let entity = ExtractedEntity {
5555                        name: entity_name,
5556                        label: entity_label,
5557                        base_salience,
5558                    };
5559                    entities.push(entity);
5560                    seen.insert(entity_key);
5561                }
5562            }
5563        }
5564
5565        // HYBRID APPROACH: POS-based extraction + YAKE importance scoring
5566        //
5567        // 1. POS extraction ensures ALL content words are captured (no frequency bias)
5568        // 2. YAKE provides discriminativeness scores for boosting rare/important terms
5569        //
5570        // This solves the "sunrise problem": YAKE alone buries rare words at position 41,
5571        // but POS ensures "sunrise" is extracted, and YAKE boosts its salience.
5572        use crate::embeddings::keywords::{KeywordConfig, KeywordExtractor};
5573        use crate::memory::query_parser::{extract_chunks, PosTag};
5574
5575        // Get YAKE importance scores for discriminative weighting
5576        let kw_config = KeywordConfig {
5577            max_keywords: 100, // Get many keywords for lookup
5578            ngrams: 1,
5579            min_length: 3,
5580            ..Default::default()
5581        };
5582        let kw_extractor = KeywordExtractor::with_config(kw_config);
5583        let keywords = kw_extractor.extract(text);
5584
5585        // Build a lookup map: term -> importance (0.0-1.0)
5586        let yake_importance: std::collections::HashMap<String, f32> = keywords
5587            .into_iter()
5588            .map(|kw| (kw.text.to_lowercase(), kw.importance))
5589            .collect();
5590
5591        // POS-based extraction for comprehensive coverage
5592        let chunk_extraction = extract_chunks(text);
5593
5594        // Add all proper nouns (these are likely named entities we might have missed)
5595        for proper_noun in &chunk_extraction.proper_nouns {
5596            let term_lower = proper_noun.to_lowercase();
5597            if !seen.contains(&term_lower) && term_lower.len() >= 3 {
5598                // Boost salience if YAKE identified this as discriminative
5599                let yake_boost = yake_importance.get(&term_lower).copied().unwrap_or(0.0);
5600                let entity = ExtractedEntity {
5601                    name: proper_noun.clone(),
5602                    label: EntityLabel::Person,
5603                    base_salience: 0.7 + (yake_boost * 0.2), // 0.7-0.9
5604                };
5605                entities.push(entity);
5606                seen.insert(term_lower);
5607            }
5608        }
5609
5610        // Add all content words as Keyword entities
5611        // POS ensures comprehensive extraction, YAKE boosts discriminative terms
5612        for chunk in &chunk_extraction.chunks {
5613            for word in &chunk.words {
5614                let term_lower = word.text.to_lowercase();
5615
5616                // Skip if already extracted or too short
5617                if seen.contains(&term_lower) || term_lower.len() < 4 {
5618                    continue;
5619                }
5620
5621                // Skip stop words
5622                if self.stop_words.contains(&term_lower) {
5623                    continue;
5624                }
5625
5626                // Base salience by POS, boosted by YAKE importance
5627                let yake_boost = yake_importance.get(&term_lower).copied().unwrap_or(0.0);
5628
5629                let (label, base_salience) = match word.pos {
5630                    PosTag::Noun | PosTag::ProperNoun => {
5631                        // Nouns are most important, start at 0.5
5632                        (EntityLabel::Keyword, 0.5)
5633                    }
5634                    PosTag::Verb => {
5635                        // Verbs connect entities, start at 0.4
5636                        (EntityLabel::Keyword, 0.4)
5637                    }
5638                    PosTag::Adjective => {
5639                        // Adjectives are modifiers, start at 0.35
5640                        (EntityLabel::Keyword, 0.35)
5641                    }
5642                    _ => continue,
5643                };
5644
5645                // Boost by YAKE importance (0.0-0.3 boost based on discriminativeness)
5646                let final_salience = base_salience + (yake_boost * 0.3);
5647
5648                let entity = ExtractedEntity {
5649                    name: word.text.clone(),
5650                    label,
5651                    base_salience: final_salience,
5652                };
5653                entities.push(entity);
5654                seen.insert(term_lower);
5655            }
5656        }
5657
5658        entities
5659    }
5660
5661    /// Extract co-occurrence pairs from text for graph edge creation
5662    ///
5663    /// Returns pairs of (entity1, entity2) that appear in the same sentence.
5664    /// This enables creating edges between words that co-occur, which is critical
5665    /// for multi-hop retrieval (e.g., connecting "Melanie" to "sunrise" when
5666    /// they appear in the same sentence about painting).
5667    pub fn extract_cooccurrence_pairs(&self, text: &str) -> Vec<(String, String)> {
5668        use crate::memory::query_parser::extract_chunks;
5669
5670        let chunk_extraction = extract_chunks(text);
5671        let mut pairs = Vec::new();
5672
5673        // Get all co-occurrence pairs from chunks (same sentence)
5674        for chunk in &chunk_extraction.chunks {
5675            let content_words = chunk.content_words();
5676
5677            // Create pairs between all content words in the same sentence
5678            for i in 0..content_words.len() {
5679                for j in (i + 1)..content_words.len() {
5680                    let w1 = content_words[i].text.to_lowercase();
5681                    let w2 = content_words[j].text.to_lowercase();
5682
5683                    // Skip very short words and stop words
5684                    if w1.len() >= 3
5685                        && w2.len() >= 3
5686                        && !self.stop_words.contains(&w1)
5687                        && !self.stop_words.contains(&w2)
5688                    {
5689                        pairs.push((w1, w2));
5690                    }
5691                }
5692            }
5693        }
5694
5695        pairs
5696    }
5697}
5698
5699impl Default for EntityExtractor {
5700    fn default() -> Self {
5701        Self::new()
5702    }
5703}
5704
5705#[cfg(test)]
5706mod tests {
5707    use super::*;
5708    use chrono::Duration;
5709
5710    /// Create a test relationship edge with specified strength and last_activated (L1 tier)
5711    fn create_test_edge(strength: f32, days_since_activated: i64) -> RelationshipEdge {
5712        create_test_edge_with_tier(strength, days_since_activated, EdgeTier::L1Working)
5713    }
5714
5715    /// Create a test relationship edge with specified strength, last_activated, and tier
5716    fn create_test_edge_with_tier(
5717        strength: f32,
5718        days_since_activated: i64,
5719        tier: EdgeTier,
5720    ) -> RelationshipEdge {
5721        RelationshipEdge {
5722            uuid: Uuid::new_v4(),
5723            from_entity: Uuid::new_v4(),
5724            to_entity: Uuid::new_v4(),
5725            relation_type: RelationType::RelatedTo,
5726            strength,
5727            created_at: Utc::now(),
5728            valid_at: Utc::now(),
5729            invalidated_at: None,
5730            source_episode_id: None,
5731            context: String::new(),
5732            last_activated: Utc::now() - Duration::days(days_since_activated),
5733            activation_count: 0,
5734            ltp_status: LtpStatus::None,
5735            activation_timestamps: None,
5736            tier,
5737            entity_confidence: None, // PIPE-5: Default for tests
5738        }
5739    }
5740
5741    #[test]
5742    fn test_hebbian_strengthen_increases_strength() {
5743        use crate::constants::*;
5744        // Use L2 tier to avoid L1 promotion resetting strength
5745        let mut edge = create_test_edge_with_tier(0.3, 0, EdgeTier::L2Episodic);
5746        let initial_strength = edge.strength;
5747
5748        let _ = edge.strengthen();
5749
5750        // With tier boost (L2 gets 80% of TIER_CO_ACCESS_BOOST), strength should increase
5751        let tier_boost = TIER_CO_ACCESS_BOOST * 0.8;
5752        let expected_boost = (LTP_LEARNING_RATE + tier_boost) * (1.0 - initial_strength);
5753        assert!(
5754            edge.strength > initial_strength,
5755            "Strengthen should increase strength (expected boost {expected_boost})"
5756        );
5757        assert_eq!(edge.activation_count, 1);
5758    }
5759
5760    #[test]
5761    fn test_hebbian_strengthen_asymptotic() {
5762        use crate::constants::*;
5763        // Use L3 tier (no promotion) with high initial strength
5764        let mut edge = create_test_edge_with_tier(0.95, 0, EdgeTier::L3Semantic);
5765
5766        let _ = edge.strengthen();
5767
5768        // High strength should still increase but slowly (asymptotic to 1.0)
5769        // L3 tier boost = TIER_CO_ACCESS_BOOST * 0.5 = 0.075
5770        let tier_boost = TIER_CO_ACCESS_BOOST * 0.5;
5771        let expected_min = 0.95 + (LTP_LEARNING_RATE + tier_boost) * 0.05 - 0.01;
5772        assert!(
5773            edge.strength > expected_min,
5774            "Expected > {expected_min}, got {}",
5775            edge.strength
5776        );
5777        assert!(edge.strength <= 1.0);
5778    }
5779
5780    #[test]
5781    fn test_hebbian_strengthen_formula() {
5782        use crate::constants::*;
5783        // Test: w_new = w_old + (η + tier_boost) × (1 - w_old)
5784        // Use L2 tier (tier_boost = TIER_CO_ACCESS_BOOST * 0.8) at 0.3 to avoid promotion
5785        let mut edge = create_test_edge_with_tier(0.3, 0, EdgeTier::L2Episodic);
5786
5787        let _ = edge.strengthen();
5788
5789        // L2 tier boost = 0.15 * 0.8 = 0.12
5790        // Expected: 0.3 + (0.1 + 0.12) * (1 - 0.3) = 0.3 + 0.22 * 0.7 = 0.454
5791        let tier_boost = TIER_CO_ACCESS_BOOST * 0.8;
5792        let expected = 0.3 + (LTP_LEARNING_RATE + tier_boost) * 0.7;
5793        assert!(
5794            (edge.strength - expected).abs() < 0.001,
5795            "Expected {expected}, got {}",
5796            edge.strength
5797        );
5798    }
5799
5800    #[test]
5801    fn test_ltp_threshold_potentiation() {
5802        let mut edge = create_test_edge(0.5, 0);
5803        assert!(!edge.is_potentiated());
5804
5805        // Strengthen 10 times (LTP_THRESHOLD = 10)
5806        for _ in 0..10 {
5807            let _ = edge.strengthen();
5808        }
5809
5810        assert!(
5811            edge.is_potentiated(),
5812            "Should be potentiated after 10 activations"
5813        );
5814        assert!(
5815            matches!(edge.ltp_status, LtpStatus::Full),
5816            "Should have Full LTP status after 10 activations"
5817        );
5818        assert!(
5819            edge.strength > 0.7,
5820            "Potentiated edge should have bonus strength"
5821        );
5822    }
5823
5824    #[test]
5825    fn test_pipe4_burst_ltp_detection() {
5826        // Create an L2 edge with low strength to avoid early tier promotion
5827        let mut edge = create_test_edge_with_tier(0.22, 0, EdgeTier::L2Episodic);
5828
5829        // Strengthen 5 times (LTP_BURST_THRESHOLD = 5) within 24 hours
5830        for _ in 0..5 {
5831            let _ = edge.strengthen();
5832        }
5833
5834        // Should have burst LTP (5+ activations in 24h)
5835        // Edge may promote to L3 during strengthening, but should keep Burst status
5836        assert!(
5837            matches!(edge.ltp_status, LtpStatus::Burst { .. }),
5838            "Should have Burst LTP after 5 rapid activations, got {:?}",
5839            edge.ltp_status
5840        );
5841    }
5842
5843    #[test]
5844    fn test_pipe4_activation_timestamps_recorded() {
5845        // L2 edges should record activation timestamps
5846        let mut edge = create_test_edge_with_tier(0.22, 0, EdgeTier::L2Episodic);
5847
5848        // Strengthen a few times
5849        for _ in 0..3 {
5850            let _ = edge.strengthen();
5851        }
5852
5853        // Should have recorded timestamps (edge may have promoted to L3, but still tracks)
5854        assert!(
5855            edge.activation_timestamps.is_some(),
5856            "L2+ edge should have activation timestamps"
5857        );
5858        assert_eq!(
5859            edge.activation_timestamps.as_ref().unwrap().len(),
5860            3,
5861            "Should have 3 recorded timestamps"
5862        );
5863    }
5864
5865    #[test]
5866    fn test_pipe4_fresh_l1_no_timestamps() {
5867        // Fresh L1 edges should NOT have activation timestamps
5868        let edge = create_test_edge(0.3, 0);
5869        assert!(matches!(edge.tier, EdgeTier::L1Working));
5870        assert!(
5871            edge.activation_timestamps.is_none(),
5872            "Fresh L1 edges should not have timestamps"
5873        );
5874    }
5875
5876    #[test]
5877    fn test_pipe4_l1_promotes_and_tracks() {
5878        // L1 edges that promote to L2 should start tracking timestamps
5879        let mut edge = create_test_edge(0.3, 0);
5880        assert!(matches!(edge.tier, EdgeTier::L1Working));
5881
5882        // Strengthen until it promotes to L2 (L1_PROMOTION_THRESHOLD = 0.5)
5883        while matches!(edge.tier, EdgeTier::L1Working) {
5884            let _ = edge.strengthen();
5885        }
5886
5887        // After promotion to L2, should start tracking
5888        assert!(
5889            matches!(edge.tier, EdgeTier::L2Episodic),
5890            "Should have promoted to L2"
5891        );
5892        // Timestamps are initialized on promotion
5893        assert!(
5894            edge.activation_timestamps.is_some(),
5895            "L2 edges should track timestamps after promotion"
5896        );
5897    }
5898
5899    #[test]
5900    fn test_pipe4_ltp_status_decay_factors() {
5901        // Test that each LTP status has correct decay factor
5902        use crate::constants::*;
5903
5904        assert_eq!(LtpStatus::None.decay_factor(), 1.0);
5905        assert_eq!(LtpStatus::Weekly.decay_factor(), LTP_WEEKLY_DECAY_FACTOR);
5906        assert_eq!(LtpStatus::Full.decay_factor(), LTP_DECAY_FACTOR);
5907
5908        // Burst factor depends on expiration
5909        let burst = LtpStatus::Burst {
5910            detected_at: Utc::now(),
5911        };
5912        assert_eq!(burst.decay_factor(), LTP_BURST_DECAY_FACTOR);
5913    }
5914
5915    #[test]
5916    fn test_pipe4_burst_to_full_upgrade() {
5917        // LTP should upgrade from Burst to Full after 10 activations
5918        let mut edge = create_test_edge_with_tier(0.22, 0, EdgeTier::L2Episodic);
5919
5920        // Get to burst LTP (5 activations)
5921        for _ in 0..5 {
5922            let _ = edge.strengthen();
5923        }
5924        assert!(
5925            matches!(edge.ltp_status, LtpStatus::Burst { .. }),
5926            "Should have Burst after 5 activations, got {:?}",
5927            edge.ltp_status
5928        );
5929
5930        // Continue strengthening to Full LTP (10 total)
5931        for _ in 0..5 {
5932            let _ = edge.strengthen();
5933        }
5934
5935        // Should now be Full (upgraded from Burst via 10 activations)
5936        assert!(
5937            matches!(edge.ltp_status, LtpStatus::Full),
5938            "Should have upgraded to Full LTP after 10 activations"
5939        );
5940    }
5941
5942    #[test]
5943    fn test_pipe4_activations_in_window() {
5944        let mut edge = create_test_edge_with_tier(0.22, 0, EdgeTier::L2Episodic);
5945
5946        // Record some activations
5947        for _ in 0..5 {
5948            let _ = edge.strengthen();
5949        }
5950
5951        let now = Utc::now();
5952        let hour_ago = now - chrono::Duration::hours(1);
5953        let day_ago = now - chrono::Duration::days(1);
5954
5955        // All activations are recent (within last second really)
5956        let in_hour = edge.activations_in_window(hour_ago, now);
5957        let in_day = edge.activations_in_window(day_ago, now);
5958        assert!(in_hour >= 5, "Expected 5+ in hour window, got {in_hour}");
5959        assert!(in_day >= 5, "Expected 5+ in day window, got {in_day}");
5960    }
5961
5962    // =========================================================================
5963    // PIPE-5: Unified LTP Readiness Model Tests
5964    // =========================================================================
5965
5966    #[test]
5967    fn test_pipe5_adjusted_threshold_default() {
5968        // Default confidence (None → 0.5) should give default threshold (10)
5969        let edge = create_test_edge_with_tier(0.5, 0, EdgeTier::L2Episodic);
5970        assert!(edge.entity_confidence.is_none());
5971
5972        let threshold = edge.adjusted_threshold();
5973        // confidence 0.5 → threshold = 13 - (0.5 * 6) = 10
5974        assert_eq!(threshold, 10, "Default confidence should give threshold 10");
5975    }
5976
5977    #[test]
5978    fn test_pipe5_adjusted_threshold_high_confidence() {
5979        // High confidence (0.9) should give lower threshold (7-8)
5980        let mut edge = create_test_edge_with_tier(0.5, 0, EdgeTier::L2Episodic);
5981        edge.entity_confidence = Some(0.9);
5982
5983        let threshold = edge.adjusted_threshold();
5984        // confidence 0.9 → threshold = 13 - (0.9 * 6) = 7.6 → 8
5985        assert!(
5986            threshold <= 8,
5987            "High confidence should give threshold <= 8, got {threshold}"
5988        );
5989    }
5990
5991    #[test]
5992    fn test_pipe5_adjusted_threshold_low_confidence() {
5993        // Low confidence (0.2) should give higher threshold (12-13)
5994        let mut edge = create_test_edge_with_tier(0.5, 0, EdgeTier::L2Episodic);
5995        edge.entity_confidence = Some(0.2);
5996
5997        let threshold = edge.adjusted_threshold();
5998        // confidence 0.2 → threshold = 13 - (0.2 * 6) = 11.8 → 12
5999        assert!(
6000            threshold >= 11,
6001            "Low confidence should give threshold >= 11, got {threshold}"
6002        );
6003    }
6004
6005    #[test]
6006    fn test_pipe5_strength_floor_by_tier() {
6007        use crate::constants::*;
6008
6009        let l1_edge = create_test_edge_with_tier(0.5, 0, EdgeTier::L1Working);
6010        let l2_edge = create_test_edge_with_tier(0.5, 0, EdgeTier::L2Episodic);
6011        let l3_edge = create_test_edge_with_tier(0.5, 0, EdgeTier::L3Semantic);
6012
6013        assert_eq!(
6014            l1_edge.strength_floor(),
6015            1.0,
6016            "L1 should have floor 1.0 (impossible)"
6017        );
6018        assert_eq!(
6019            l2_edge.strength_floor(),
6020            LTP_STRENGTH_FLOOR_L2,
6021            "L2 floor mismatch"
6022        );
6023        assert_eq!(
6024            l3_edge.strength_floor(),
6025            LTP_STRENGTH_FLOOR_L3,
6026            "L3 floor mismatch"
6027        );
6028    }
6029
6030    #[test]
6031    fn test_pipe5_ltp_readiness_l1_returns_zero() {
6032        // L1 edges should always return 0 readiness (can't reach Full LTP)
6033        let mut edge = create_test_edge_with_tier(0.99, 0, EdgeTier::L1Working);
6034        edge.activation_count = 100;
6035        edge.entity_confidence = Some(1.0);
6036
6037        assert_eq!(
6038            edge.ltp_readiness(),
6039            0.0,
6040            "L1 edges should always return 0 readiness"
6041        );
6042    }
6043
6044    #[test]
6045    fn test_pipe5_ltp_readiness_balanced_path() {
6046        use crate::constants::*;
6047
6048        // Balanced: 10 activations + 0.75 strength + 0.5 confidence
6049        // count_score = 10 / 10 = 1.0
6050        // strength_score = 0.75 / 0.65 = 1.15
6051        // tag_bonus = 0.5 * 0.3 = 0.15
6052        // readiness = 1.0 * 0.5 + 1.15 * 0.5 + 0.15 = 0.5 + 0.575 + 0.15 = 1.225
6053        let mut edge = create_test_edge_with_tier(0.75, 0, EdgeTier::L2Episodic);
6054        edge.activation_count = 10;
6055        edge.entity_confidence = Some(0.5);
6056
6057        let readiness = edge.ltp_readiness();
6058        assert!(
6059            readiness >= LTP_READINESS_THRESHOLD,
6060            "Balanced path should reach LTP, readiness = {}",
6061            readiness
6062        );
6063    }
6064
6065    #[test]
6066    fn test_pipe5_ltp_readiness_repetition_dominant() {
6067        use crate::constants::*;
6068
6069        // Repetition dominant: 15 activations + 0.50 strength (below floor)
6070        // count_score = 15 / 10 = 1.5
6071        // strength_score = 0.50 / 0.65 = 0.77
6072        // tag_bonus = 0.5 * 0.3 = 0.15
6073        // readiness = 1.5 * 0.5 + 0.77 * 0.5 + 0.15 = 0.75 + 0.385 + 0.15 = 1.285
6074        let mut edge = create_test_edge_with_tier(0.50, 0, EdgeTier::L2Episodic);
6075        edge.activation_count = 15;
6076        edge.entity_confidence = Some(0.5);
6077
6078        let readiness = edge.ltp_readiness();
6079        assert!(
6080            readiness >= LTP_READINESS_THRESHOLD,
6081            "Repetition-dominant path should reach LTP, readiness = {}",
6082            readiness
6083        );
6084    }
6085
6086    #[test]
6087    fn test_pipe5_ltp_readiness_intensity_dominant() {
6088        use crate::constants::*;
6089
6090        // Intensity dominant: 5 activations + 0.95 strength (L3)
6091        // count_score = 5 / 10 = 0.5
6092        // strength_score = 0.95 / 0.80 = 1.1875
6093        // tag_bonus = 0.5 * 0.3 = 0.15
6094        // readiness = 0.5 * 0.5 + 1.1875 * 0.5 + 0.15 = 0.25 + 0.59 + 0.15 = 0.99
6095        // Need more strength or count for intensity-only path on L3
6096        let mut edge = create_test_edge_with_tier(0.99, 0, EdgeTier::L3Semantic);
6097        edge.activation_count = 6;
6098        edge.entity_confidence = Some(0.5);
6099
6100        let readiness = edge.ltp_readiness();
6101        // count_score = 6/10 = 0.6, strength_score = 0.99/0.80 = 1.24
6102        // readiness = 0.6*0.5 + 1.24*0.5 + 0.15 = 0.3 + 0.62 + 0.15 = 1.07
6103        assert!(
6104            readiness >= LTP_READINESS_THRESHOLD,
6105            "Intensity-dominant path should reach LTP, readiness = {}",
6106            readiness
6107        );
6108    }
6109
6110    #[test]
6111    fn test_pipe5_ltp_readiness_high_confidence_boost() {
6112        use crate::constants::*;
6113
6114        // High confidence edge reaches LTP faster
6115        // 7 activations + 0.65 strength + 0.9 confidence
6116        // threshold = 13 - 0.9*6 = 7.6 → 8
6117        // count_score = 7 / 8 = 0.875
6118        // strength_score = 0.65 / 0.65 = 1.0
6119        // tag_bonus = 0.9 * 0.3 = 0.27
6120        // readiness = 0.875 * 0.5 + 1.0 * 0.5 + 0.27 = 0.44 + 0.5 + 0.27 = 1.21
6121        let mut edge = create_test_edge_with_tier(0.65, 0, EdgeTier::L2Episodic);
6122        edge.activation_count = 7;
6123        edge.entity_confidence = Some(0.9);
6124
6125        let readiness = edge.ltp_readiness();
6126        assert!(
6127            readiness >= LTP_READINESS_THRESHOLD,
6128            "High-confidence should boost to LTP, readiness = {}",
6129            readiness
6130        );
6131    }
6132
6133    #[test]
6134    fn test_pipe5_weak_edge_no_ltp() {
6135        use crate::constants::*;
6136
6137        // Weak edge: 4 activations + 0.40 strength + 0.3 confidence
6138        // threshold = 13 - 0.3*6 = 11.2 → 11
6139        // count_score = 4 / 11 = 0.36
6140        // strength_score = 0.40 / 0.65 = 0.62
6141        // tag_bonus = 0.3 * 0.3 = 0.09
6142        // readiness = 0.36 * 0.5 + 0.62 * 0.5 + 0.09 = 0.18 + 0.31 + 0.09 = 0.58
6143        let mut edge = create_test_edge_with_tier(0.40, 0, EdgeTier::L2Episodic);
6144        edge.activation_count = 4;
6145        edge.entity_confidence = Some(0.3);
6146
6147        let readiness = edge.ltp_readiness();
6148        assert!(
6149            readiness < LTP_READINESS_THRESHOLD,
6150            "Weak edge should NOT reach LTP, readiness = {}",
6151            readiness
6152        );
6153    }
6154
6155    #[test]
6156    fn test_pipe5_unified_detect_ltp_status() {
6157        // Test that detect_ltp_status uses the unified readiness formula
6158        let mut edge = create_test_edge_with_tier(0.75, 0, EdgeTier::L2Episodic);
6159        edge.activation_count = 10;
6160        edge.entity_confidence = Some(0.5);
6161        edge.activation_timestamps = Some(std::collections::VecDeque::new());
6162
6163        let status = edge.detect_ltp_status(Utc::now());
6164        assert_eq!(
6165            status,
6166            LtpStatus::Full,
6167            "Balanced path should grant Full LTP via readiness"
6168        );
6169    }
6170
6171    #[test]
6172    fn test_pipe5_l3_no_auto_ltp_without_activations() {
6173        // L3 with high strength but low activation count should NOT auto-LTP
6174        // This tests that the old auto-LTP behavior is removed
6175        let mut edge = create_test_edge_with_tier(0.85, 0, EdgeTier::L3Semantic);
6176        edge.activation_count = 2; // Low count
6177        edge.entity_confidence = Some(0.5);
6178        edge.activation_timestamps = Some(std::collections::VecDeque::new());
6179
6180        // count_score = 2/10 = 0.2, strength_score = 0.85/0.80 = 1.06
6181        // readiness = 0.2*0.5 + 1.06*0.5 + 0.15 = 0.1 + 0.53 + 0.15 = 0.78
6182        let status = edge.detect_ltp_status(Utc::now());
6183        assert_eq!(
6184            status,
6185            LtpStatus::None,
6186            "L3 high strength alone should NOT grant Full LTP, needs activations too"
6187        );
6188    }
6189
6190    #[test]
6191    fn test_decay_reduces_strength() {
6192        // Use L2 tier for multi-day decay testing (L1 max age is only 4 hours)
6193        let mut edge = create_test_edge_with_tier(0.5, 7, EdgeTier::L2Episodic);
6194
6195        let initial_strength = edge.strength;
6196        edge.decay();
6197
6198        assert!(
6199            edge.strength < initial_strength,
6200            "Decay should reduce strength (initial: {}, after: {})",
6201            initial_strength,
6202            edge.strength
6203        );
6204    }
6205
6206    #[test]
6207    fn test_decay_tier_aware() {
6208        // Test tier-aware decay: L2 episodic with exponential decay (λ=0.031/day) over 7 days
6209        // Expected: e^(-0.031 * 7) ≈ 0.805, so strength decays to ~80%
6210        let mut edge = create_test_edge_with_tier(1.0, 7, EdgeTier::L2Episodic);
6211
6212        edge.decay();
6213
6214        // After 7 days with L2 exponential decay, expect moderate reduction (~80% retained)
6215        // but well above floor since within max age
6216        assert!(
6217            edge.strength < 0.85,
6218            "After 7 days with L2 decay, strength should be below 0.85, got {}",
6219            edge.strength
6220        );
6221        assert!(
6222            edge.strength > 0.75,
6223            "After 7 days with L2 decay, strength should be above 0.75, got {}",
6224            edge.strength
6225        );
6226        assert!(
6227            edge.strength > LTP_MIN_STRENGTH,
6228            "Strength should still be above floor, got {}",
6229            edge.strength
6230        );
6231    }
6232
6233    #[test]
6234    fn test_decay_minimum_floor() {
6235        // Use L3 tier for very old edge testing (L3 has 10 year max age)
6236        let mut edge = create_test_edge_with_tier(0.02, 100, EdgeTier::L3Semantic);
6237
6238        edge.decay();
6239
6240        assert!(
6241            edge.strength >= LTP_MIN_STRENGTH,
6242            "Strength should not go below minimum floor"
6243        );
6244    }
6245
6246    #[test]
6247    fn test_potentiated_decay_slower() {
6248        // Use L2 tier for multi-day decay comparison
6249        let mut edge1 = create_test_edge_with_tier(0.8, 7, EdgeTier::L2Episodic);
6250        let mut edge2 = create_test_edge_with_tier(0.8, 7, EdgeTier::L2Episodic);
6251        edge2.ltp_status = LtpStatus::Full; // Full LTP = 10x slower decay
6252
6253        edge1.decay();
6254        edge2.decay();
6255
6256        assert!(
6257            edge2.strength > edge1.strength,
6258            "Potentiated edge should decay slower (normal: {}, potentiated: {})",
6259            edge1.strength,
6260            edge2.strength
6261        );
6262    }
6263
6264    #[test]
6265    fn test_effective_strength_read_only() {
6266        // Use L2 tier for multi-day testing
6267        let edge = create_test_edge_with_tier(0.5, 7, EdgeTier::L2Episodic);
6268        let initial_strength = edge.strength;
6269
6270        let effective = edge.effective_strength();
6271
6272        // effective_strength should not modify the edge
6273        assert_eq!(edge.strength, initial_strength);
6274        assert!(effective < initial_strength);
6275    }
6276
6277    #[test]
6278    fn test_decay_prune_threshold() {
6279        // Use L2 tier for decay testing beyond its max age (14 days)
6280        let mut weak_edge = create_test_edge_with_tier(LTP_MIN_STRENGTH, 30, EdgeTier::L2Episodic);
6281        // No LTP status = normal decay
6282        assert!(matches!(weak_edge.ltp_status, LtpStatus::None));
6283
6284        let should_prune = weak_edge.decay();
6285
6286        // Non-potentiated edge at minimum strength past max age should be prunable
6287        assert!(
6288            should_prune,
6289            "Weak non-potentiated edge past max age should be marked for pruning"
6290        );
6291    }
6292
6293    #[test]
6294    fn test_potentiated_above_floor_never_pruned() {
6295        // Potentiated edge above LTP_PRUNE_FLOOR should be protected
6296        let mut edge = create_test_edge_with_tier(0.1, 30, EdgeTier::L2Episodic);
6297        edge.ltp_status = LtpStatus::Full;
6298
6299        let should_prune = edge.decay();
6300
6301        assert!(
6302            !should_prune,
6303            "Potentiated edges above LTP_PRUNE_FLOOR should not be pruned"
6304        );
6305    }
6306
6307    #[test]
6308    fn test_potentiated_at_floor_stripped_and_prunable() {
6309        // Potentiated edge at/below LTP_PRUNE_FLOOR should have LTP stripped
6310        let mut edge = create_test_edge_with_tier(LTP_MIN_STRENGTH, 30, EdgeTier::L2Episodic);
6311        edge.ltp_status = LtpStatus::Full;
6312
6313        let should_prune = edge.decay();
6314
6315        // LTP gets stripped because strength <= LTP_PRUNE_FLOOR,
6316        // then normal prune logic applies (strength at floor, past max age)
6317        assert!(
6318            should_prune,
6319            "Zombie potentiated edges at floor strength should be prunable"
6320        );
6321        assert!(
6322            matches!(edge.ltp_status, LtpStatus::None),
6323            "LTP status should be stripped when strength at floor"
6324        );
6325    }
6326
6327    #[test]
6328    fn test_salience_calculation() {
6329        let person_salience = EntityExtractor::calculate_base_salience(&EntityLabel::Person, false);
6330        let person_proper_salience =
6331            EntityExtractor::calculate_base_salience(&EntityLabel::Person, true);
6332
6333        assert_eq!(person_salience, 0.8);
6334        assert!((person_proper_salience - 0.96).abs() < 0.01); // 0.8 * 1.2 = 0.96
6335    }
6336
6337    #[test]
6338    fn test_salience_caps_at_one() {
6339        // Person (0.8) * 1.2 = 0.96, should not exceed 1.0
6340        let salience = EntityExtractor::calculate_base_salience(&EntityLabel::Person, true);
6341        assert!(salience <= 1.0);
6342    }
6343
6344    #[test]
6345    fn test_hebbian_strength_no_episode() {
6346        // Create a temporary graph memory for testing
6347        let temp_dir = tempfile::tempdir().unwrap();
6348        let graph = GraphMemory::new(temp_dir.path(), None).unwrap();
6349
6350        // Random memory ID with no associated episode should return 0.5 (neutral)
6351        let fake_memory_id = crate::memory::MemoryId(Uuid::new_v4());
6352        let strength = graph.get_memory_hebbian_strength(&fake_memory_id);
6353        assert_eq!(strength, Some(0.5), "No episode should return neutral 0.5");
6354    }
6355
6356    #[test]
6357    fn test_hebbian_strength_with_episode_no_edges() {
6358        let temp_dir = tempfile::tempdir().unwrap();
6359        let graph = GraphMemory::new(temp_dir.path(), None).unwrap();
6360
6361        // Create entities
6362        let entity1 = EntityNode {
6363            uuid: Uuid::new_v4(),
6364            name: "Entity1".to_string(),
6365            labels: vec![EntityLabel::Person],
6366            created_at: Utc::now(),
6367            last_seen_at: Utc::now(),
6368            mention_count: 1,
6369            summary: String::new(),
6370            attributes: std::collections::HashMap::new(),
6371            name_embedding: None,
6372            salience: 0.5,
6373            is_proper_noun: false,
6374        };
6375        let entity2 = EntityNode {
6376            uuid: Uuid::new_v4(),
6377            name: "Entity2".to_string(),
6378            labels: vec![EntityLabel::Organization],
6379            created_at: Utc::now(),
6380            last_seen_at: Utc::now(),
6381            mention_count: 1,
6382            summary: String::new(),
6383            attributes: std::collections::HashMap::new(),
6384            name_embedding: None,
6385            salience: 0.5,
6386            is_proper_noun: false,
6387        };
6388
6389        let entity1_uuid = graph.add_entity(entity1.clone()).unwrap();
6390        let entity2_uuid = graph.add_entity(entity2.clone()).unwrap();
6391
6392        // Create episode with entities but no edges
6393        let memory_id = crate::memory::MemoryId(Uuid::new_v4());
6394        let episode = EpisodicNode {
6395            uuid: memory_id.0,
6396            name: "Test Episode".to_string(),
6397            content: "Test content".to_string(),
6398            valid_at: Utc::now(),
6399            created_at: Utc::now(),
6400            entity_refs: vec![entity1_uuid, entity2_uuid],
6401            source: EpisodeSource::Message,
6402            metadata: std::collections::HashMap::new(),
6403        };
6404        graph.add_episode(episode).unwrap();
6405
6406        // Episode with entities but no edges should return 0.5
6407        let strength = graph.get_memory_hebbian_strength(&memory_id);
6408        assert_eq!(
6409            strength,
6410            Some(0.5),
6411            "Episode without edges should return neutral 0.5"
6412        );
6413    }
6414
6415    #[test]
6416    fn test_hebbian_strength_with_edges() {
6417        let temp_dir = tempfile::tempdir().unwrap();
6418        let graph = GraphMemory::new(temp_dir.path(), None).unwrap();
6419
6420        // Create entities
6421        let entity1_uuid = Uuid::new_v4();
6422        let entity2_uuid = Uuid::new_v4();
6423
6424        let entity1 = EntityNode {
6425            uuid: entity1_uuid,
6426            name: "Entity1".to_string(),
6427            labels: vec![EntityLabel::Person],
6428            created_at: Utc::now(),
6429            last_seen_at: Utc::now(),
6430            mention_count: 1,
6431            summary: String::new(),
6432            attributes: std::collections::HashMap::new(),
6433            name_embedding: None,
6434            salience: 0.5,
6435            is_proper_noun: false,
6436        };
6437        let entity2 = EntityNode {
6438            uuid: entity2_uuid,
6439            name: "Entity2".to_string(),
6440            labels: vec![EntityLabel::Organization],
6441            created_at: Utc::now(),
6442            last_seen_at: Utc::now(),
6443            mention_count: 1,
6444            summary: String::new(),
6445            attributes: std::collections::HashMap::new(),
6446            name_embedding: None,
6447            salience: 0.5,
6448            is_proper_noun: false,
6449        };
6450
6451        graph.add_entity(entity1).unwrap();
6452        graph.add_entity(entity2).unwrap();
6453
6454        // Create episode
6455        let memory_id = crate::memory::MemoryId(Uuid::new_v4());
6456        let episode = EpisodicNode {
6457            uuid: memory_id.0,
6458            name: "Test Episode".to_string(),
6459            content: "Test content".to_string(),
6460            valid_at: Utc::now(),
6461            created_at: Utc::now(),
6462            entity_refs: vec![entity1_uuid, entity2_uuid],
6463            source: EpisodeSource::Message,
6464            metadata: std::collections::HashMap::new(),
6465        };
6466        graph.add_episode(episode).unwrap();
6467
6468        // Create edge between entities with known strength
6469        let edge = RelationshipEdge {
6470            uuid: Uuid::new_v4(),
6471            from_entity: entity1_uuid,
6472            to_entity: entity2_uuid,
6473            relation_type: RelationType::RelatedTo,
6474            strength: 0.8,
6475            created_at: Utc::now(),
6476            valid_at: Utc::now(),
6477            invalidated_at: None,
6478            source_episode_id: Some(memory_id.0),
6479            context: "Test context".to_string(),
6480            last_activated: Utc::now(), // Just activated - no decay
6481            activation_count: 5,
6482            ltp_status: LtpStatus::None,
6483            activation_timestamps: None,
6484            tier: EdgeTier::L2Episodic, // Use L2 since it has activation count
6485            entity_confidence: None,    // PIPE-5: Default for tests
6486        };
6487        graph.add_relationship(edge).unwrap();
6488
6489        // Should return the edge strength (0.8, with minimal decay since just activated)
6490        let strength = graph.get_memory_hebbian_strength(&memory_id);
6491        assert!(strength.is_some());
6492        let s = strength.unwrap();
6493        assert!(s > 0.75 && s <= 0.8, "Strength should be ~0.8, got {}", s);
6494    }
6495}
shodh_memory/graph_memory.rs

shodh_memory/
graph_memory.rs