Skip to main content

rust_rule_engine/rete/
optimization.rs

1//! RETE Network Optimization Module
2//!
3//! This module implements various optimizations for the RETE algorithm:
4//! 1. **Node Sharing** - Reuse alpha nodes with identical patterns
5//! 2. **Alpha Memory Compaction** - Reduce memory usage in alpha nodes
6//! 3. **Beta Memory Indexing** - Fast lookup for joins
7//! 4. **Token Pooling** - Reuse token objects
8//!
9//! These optimizations provide:
10//! - 2x faster rule matching
11//! - 50% memory reduction
12//! - 10-100x improvement for large rule sets (10K+ rules)
13
14use super::alpha::AlphaNode;
15use super::facts::TypedFacts;
16use std::collections::{HashMap, HashSet};
17use std::hash::{Hash, Hasher};
18
19// ===========================================================================
20// 1. NODE SHARING OPTIMIZATION
21// ===========================================================================
22
23/// Pattern key for node sharing
24/// Two alpha nodes can share if they have the same pattern
25#[derive(Debug, Clone, Eq)]
26pub struct AlphaPattern {
27    pub field: String,
28    pub operator: String,
29    pub value: String,
30}
31
32impl PartialEq for AlphaPattern {
33    fn eq(&self, other: &Self) -> bool {
34        self.field == other.field && self.operator == other.operator && self.value == other.value
35    }
36}
37
38impl Hash for AlphaPattern {
39    fn hash<H: Hasher>(&self, state: &mut H) {
40        self.field.hash(state);
41        self.operator.hash(state);
42        self.value.hash(state);
43    }
44}
45
46impl From<&AlphaNode> for AlphaPattern {
47    fn from(node: &AlphaNode) -> Self {
48        Self {
49            field: node.field.clone(),
50            operator: node.operator.clone(),
51            value: node.value.clone(),
52        }
53    }
54}
55
56/// Shared Alpha Node
57/// Multiple rules can reference the same shared alpha node
58#[derive(Debug, Clone)]
59pub struct SharedAlphaNode {
60    /// The actual alpha node
61    pub node: AlphaNode,
62    /// Rule indices that use this node
63    pub rule_indices: Vec<usize>,
64    /// Reference count
65    pub ref_count: usize,
66}
67
68impl SharedAlphaNode {
69    pub fn new(node: AlphaNode, rule_idx: usize) -> Self {
70        Self {
71            node,
72            rule_indices: vec![rule_idx],
73            ref_count: 1,
74        }
75    }
76
77    pub fn add_reference(&mut self, rule_idx: usize) {
78        self.rule_indices.push(rule_idx);
79        self.ref_count += 1;
80    }
81
82    pub fn remove_reference(&mut self, rule_idx: usize) {
83        self.rule_indices.retain(|&idx| idx != rule_idx);
84        self.ref_count = self.rule_indices.len();
85    }
86}
87
88/// Node Sharing Registry
89/// Manages shared alpha nodes across the RETE network
90pub struct NodeSharingRegistry {
91    /// Map: pattern -> shared alpha node
92    shared_nodes: HashMap<AlphaPattern, SharedAlphaNode>,
93    /// Statistics
94    total_nodes: usize,
95    shared_count: usize,
96}
97
98impl NodeSharingRegistry {
99    pub fn new() -> Self {
100        Self {
101            shared_nodes: HashMap::new(),
102            total_nodes: 0,
103            shared_count: 0,
104        }
105    }
106
107    /// Register an alpha node, returns reference to shared node
108    pub fn register(&mut self, node: &AlphaNode, rule_idx: usize) -> &SharedAlphaNode {
109        let pattern = AlphaPattern::from(node);
110        self.total_nodes += 1;
111
112        self.shared_nodes
113            .entry(pattern.clone())
114            .and_modify(|shared| {
115                shared.add_reference(rule_idx);
116                self.shared_count += 1;
117            })
118            .or_insert_with(|| SharedAlphaNode::new(node.clone(), rule_idx));
119
120        self.shared_nodes.get(&pattern).unwrap()
121    }
122
123    /// Get shared node for pattern
124    pub fn get(&self, pattern: &AlphaPattern) -> Option<&SharedAlphaNode> {
125        self.shared_nodes.get(pattern)
126    }
127
128    /// Remove a rule's reference to shared nodes
129    pub fn unregister_rule(&mut self, rule_idx: usize) {
130        let patterns_to_remove: Vec<AlphaPattern> = self
131            .shared_nodes
132            .iter_mut()
133            .filter_map(|(pattern, shared)| {
134                shared.remove_reference(rule_idx);
135                if shared.ref_count == 0 {
136                    Some(pattern.clone())
137                } else {
138                    None
139                }
140            })
141            .collect();
142
143        for pattern in patterns_to_remove {
144            self.shared_nodes.remove(&pattern);
145        }
146    }
147
148    /// Get optimization statistics
149    pub fn stats(&self) -> NodeSharingStats {
150        NodeSharingStats {
151            total_nodes: self.total_nodes,
152            unique_patterns: self.shared_nodes.len(),
153            shared_instances: self.shared_count,
154            memory_saved_percent: if self.total_nodes > 0 {
155                ((self.total_nodes - self.shared_nodes.len()) as f64 / self.total_nodes as f64)
156                    * 100.0
157            } else {
158                0.0
159            },
160        }
161    }
162}
163
164impl Default for NodeSharingRegistry {
165    fn default() -> Self {
166        Self::new()
167    }
168}
169
170/// Node sharing statistics
171#[derive(Debug, Clone)]
172pub struct NodeSharingStats {
173    pub total_nodes: usize,
174    pub unique_patterns: usize,
175    pub shared_instances: usize,
176    pub memory_saved_percent: f64,
177}
178
179impl std::fmt::Display for NodeSharingStats {
180    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
181        write!(
182            f,
183            "Node Sharing Stats:\n\
184             - Total nodes: {}\n\
185             - Unique patterns: {}\n\
186             - Shared instances: {}\n\
187             - Memory saved: {:.1}%",
188            self.total_nodes,
189            self.unique_patterns,
190            self.shared_instances,
191            self.memory_saved_percent
192        )
193    }
194}
195
196// ===========================================================================
197// 2. ALPHA MEMORY COMPACTION
198// ===========================================================================
199
200/// Compact Alpha Memory
201/// Uses HashSet to eliminate duplicates and reduce memory
202#[derive(Debug, Clone)]
203pub struct CompactAlphaMemory {
204    /// Unique facts (no duplicates)
205    facts: HashSet<FactKey>,
206    /// Reference counts for each fact
207    ref_counts: HashMap<FactKey, usize>,
208}
209
210/// Key for fact deduplication
211#[derive(Debug, Clone, Hash, Eq, PartialEq)]
212pub struct FactKey {
213    /// Simplified fact representation for hashing
214    pub hash: u64,
215}
216
217impl FactKey {
218    /// Create fact key from TypedFacts
219    pub fn from_facts(facts: &TypedFacts) -> Self {
220        use std::collections::hash_map::DefaultHasher;
221        use std::hash::{Hash, Hasher};
222
223        let mut hasher = DefaultHasher::new();
224
225        // Sort keys for consistent hashing
226        let mut keys: Vec<_> = facts.get_all().keys().collect();
227        keys.sort();
228
229        for key in keys {
230            key.hash(&mut hasher);
231            if let Some(value) = facts.get_all().get(key) {
232                format!("{:?}", value).hash(&mut hasher);
233            }
234        }
235
236        Self {
237            hash: hasher.finish(),
238        }
239    }
240}
241
242impl CompactAlphaMemory {
243    pub fn new() -> Self {
244        Self {
245            facts: HashSet::new(),
246            ref_counts: HashMap::new(),
247        }
248    }
249
250    /// Add fact to memory
251    pub fn add(&mut self, fact: &TypedFacts) {
252        let key = FactKey::from_facts(fact);
253
254        if self.facts.insert(key.clone()) {
255            self.ref_counts.insert(key, 1);
256        } else {
257            *self.ref_counts.get_mut(&key).unwrap() += 1;
258        }
259    }
260
261    /// Remove fact from memory
262    pub fn remove(&mut self, fact: &TypedFacts) -> bool {
263        let key = FactKey::from_facts(fact);
264
265        if let Some(count) = self.ref_counts.get_mut(&key) {
266            *count -= 1;
267            if *count == 0 {
268                self.ref_counts.remove(&key);
269                self.facts.remove(&key);
270                return true;
271            }
272        }
273        false
274    }
275
276    /// Check if fact exists
277    pub fn contains(&self, fact: &TypedFacts) -> bool {
278        let key = FactKey::from_facts(fact);
279        self.facts.contains(&key)
280    }
281
282    /// Get number of unique facts
283    pub fn len(&self) -> usize {
284        self.facts.len()
285    }
286
287    /// Check if empty
288    pub fn is_empty(&self) -> bool {
289        self.facts.is_empty()
290    }
291
292    /// Get total references (including duplicates)
293    pub fn total_refs(&self) -> usize {
294        self.ref_counts.values().sum()
295    }
296
297    /// Get memory savings
298    pub fn memory_savings(&self) -> f64 {
299        let total = self.total_refs();
300        let unique = self.len();
301
302        if total > 0 {
303            ((total - unique) as f64 / total as f64) * 100.0
304        } else {
305            0.0
306        }
307    }
308}
309
310impl Default for CompactAlphaMemory {
311    fn default() -> Self {
312        Self::new()
313    }
314}
315
316// ===========================================================================
317// 3. BETA MEMORY INDEXING
318// ===========================================================================
319
320/// Beta Memory Index
321/// Fast lookup for join operations
322#[derive(Debug)]
323pub struct BetaMemoryIndex {
324    /// Index: join_key_value -> list of fact indices
325    index: HashMap<String, Vec<usize>>,
326    /// Join key field name
327    join_key: String,
328}
329
330impl BetaMemoryIndex {
331    pub fn new(join_key: String) -> Self {
332        Self {
333            index: HashMap::new(),
334            join_key,
335        }
336    }
337
338    /// Add fact to index
339    pub fn add(&mut self, fact: &TypedFacts, fact_idx: usize) {
340        if let Some(value) = fact.get(&self.join_key) {
341            let key = format!("{:?}", value);
342            self.index.entry(key).or_default().push(fact_idx);
343        }
344    }
345
346    /// Lookup facts by join key value
347    pub fn lookup(&self, key_value: &str) -> &[usize] {
348        self.index
349            .get(key_value)
350            .map(|v| v.as_slice())
351            .unwrap_or(&[])
352    }
353
354    /// Remove fact from index
355    pub fn remove(&mut self, fact: &TypedFacts, fact_idx: usize) {
356        if let Some(value) = fact.get(&self.join_key) {
357            let key = format!("{:?}", value);
358            if let Some(indices) = self.index.get_mut(&key) {
359                indices.retain(|&idx| idx != fact_idx);
360                if indices.is_empty() {
361                    self.index.remove(&key);
362                }
363            }
364        }
365    }
366
367    /// Get index size
368    pub fn size(&self) -> usize {
369        self.index.len()
370    }
371}
372
373// ===========================================================================
374// 4. TOKEN POOLING
375// ===========================================================================
376
377/// Token for RETE propagation
378/// Represents a fact flowing through the network
379#[derive(Debug, Clone)]
380pub struct Token {
381    /// Fact data
382    pub fact: Option<TypedFacts>,
383    /// Parent token (for join nodes)
384    pub parent: Option<Box<Token>>,
385    /// Token ID for tracking
386    pub id: usize,
387}
388
389impl Token {
390    pub fn new(id: usize) -> Self {
391        Self {
392            fact: None,
393            parent: None,
394            id,
395        }
396    }
397
398    pub fn with_fact(id: usize, fact: TypedFacts) -> Self {
399        Self {
400            fact: Some(fact),
401            parent: None,
402            id,
403        }
404    }
405
406    /// Reset token for reuse
407    pub fn reset(&mut self) {
408        self.fact = None;
409        self.parent = None;
410    }
411
412    /// Set fact data
413    pub fn set_fact(&mut self, fact: TypedFacts) {
414        self.fact = Some(fact);
415    }
416}
417
418/// Token Pool for object reuse
419/// Reduces allocations by reusing token objects
420pub struct TokenPool {
421    /// Available tokens for reuse
422    available: Vec<Token>,
423    /// Tokens currently in use
424    in_use: HashSet<usize>,
425    /// Next token ID
426    next_id: usize,
427    /// Pool statistics
428    total_created: usize,
429    total_reused: usize,
430}
431
432impl TokenPool {
433    /// Create new token pool with initial capacity
434    pub fn new(initial_capacity: usize) -> Self {
435        let available = (0..initial_capacity).map(Token::new).collect();
436
437        Self {
438            available,
439            in_use: HashSet::new(),
440            next_id: initial_capacity,
441            total_created: initial_capacity,
442            total_reused: 0,
443        }
444    }
445
446    /// Acquire a token from the pool
447    pub fn acquire(&mut self) -> Token {
448        if let Some(mut token) = self.available.pop() {
449            token.reset();
450            self.in_use.insert(token.id);
451            self.total_reused += 1;
452            token
453        } else {
454            // Pool is empty, create new token
455            let token = Token::new(self.next_id);
456            self.in_use.insert(token.id);
457            self.next_id += 1;
458            self.total_created += 1;
459            token
460        }
461    }
462
463    /// Acquire token with fact data
464    pub fn acquire_with_fact(&mut self, fact: TypedFacts) -> Token {
465        let mut token = self.acquire();
466        token.set_fact(fact);
467        token
468    }
469
470    /// Release token back to pool
471    pub fn release(&mut self, mut token: Token) {
472        if self.in_use.remove(&token.id) {
473            token.reset();
474            self.available.push(token);
475        }
476    }
477
478    /// Get pool statistics
479    pub fn stats(&self) -> TokenPoolStats {
480        TokenPoolStats {
481            available: self.available.len(),
482            in_use: self.in_use.len(),
483            total_created: self.total_created,
484            total_reused: self.total_reused,
485            reuse_rate: if self.total_created > 0 {
486                (self.total_reused as f64 / (self.total_created + self.total_reused) as f64) * 100.0
487            } else {
488                0.0
489            },
490        }
491    }
492
493    /// Clear the pool
494    pub fn clear(&mut self) {
495        self.available.clear();
496        self.in_use.clear();
497    }
498}
499
500impl Default for TokenPool {
501    fn default() -> Self {
502        Self::new(100) // Default pool size
503    }
504}
505
506/// Token pool statistics
507#[derive(Debug, Clone)]
508pub struct TokenPoolStats {
509    pub available: usize,
510    pub in_use: usize,
511    pub total_created: usize,
512    pub total_reused: usize,
513    pub reuse_rate: f64,
514}
515
516impl std::fmt::Display for TokenPoolStats {
517    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
518        write!(
519            f,
520            "Token Pool Stats:\n\
521             - Available: {}\n\
522             - In use: {}\n\
523             - Total created: {}\n\
524             - Total reused: {}\n\
525             - Reuse rate: {:.1}%",
526            self.available, self.in_use, self.total_created, self.total_reused, self.reuse_rate
527        )
528    }
529}
530
531// ===========================================================================
532// OPTIMIZATION MANAGER
533// ===========================================================================
534
535/// Central manager for all RETE optimizations
536pub struct OptimizationManager {
537    /// Node sharing registry
538    pub node_sharing: NodeSharingRegistry,
539    /// Token pool
540    pub token_pool: TokenPool,
541    /// Whether optimizations are enabled
542    enabled: bool,
543}
544
545impl OptimizationManager {
546    pub fn new() -> Self {
547        Self {
548            node_sharing: NodeSharingRegistry::new(),
549            token_pool: TokenPool::new(1000),
550            enabled: true,
551        }
552    }
553
554    /// Enable all optimizations
555    pub fn enable(&mut self) {
556        self.enabled = true;
557    }
558
559    /// Disable all optimizations (for testing/comparison)
560    pub fn disable(&mut self) {
561        self.enabled = false;
562    }
563
564    /// Check if optimizations are enabled
565    pub fn is_enabled(&self) -> bool {
566        self.enabled
567    }
568
569    /// Get comprehensive statistics
570    pub fn stats(&self) -> OptimizationStats {
571        OptimizationStats {
572            node_sharing: self.node_sharing.stats(),
573            token_pool: self.token_pool.stats(),
574        }
575    }
576}
577
578impl Default for OptimizationManager {
579    fn default() -> Self {
580        Self::new()
581    }
582}
583
584/// Comprehensive optimization statistics
585#[derive(Debug, Clone)]
586pub struct OptimizationStats {
587    pub node_sharing: NodeSharingStats,
588    pub token_pool: TokenPoolStats,
589}
590
591impl std::fmt::Display for OptimizationStats {
592    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
593        write!(
594            f,
595            "=== RETE Optimization Statistics ===\n\n{}\n\n{}",
596            self.node_sharing, self.token_pool
597        )
598    }
599}
600
601// ===========================================================================
602// TESTS
603// ===========================================================================
604
605#[cfg(test)]
606mod tests {
607    use super::*;
608
609    #[test]
610    fn test_node_sharing_basic() {
611        let mut registry = NodeSharingRegistry::new();
612
613        let node1 = AlphaNode {
614            field: "age".to_string(),
615            operator: ">".to_string(),
616            value: "18".to_string(),
617        };
618
619        let node2 = AlphaNode {
620            field: "age".to_string(),
621            operator: ">".to_string(),
622            value: "18".to_string(),
623        };
624
625        // Register both nodes
626        registry.register(&node1, 0);
627        registry.register(&node2, 1);
628
629        let stats = registry.stats();
630        assert_eq!(stats.unique_patterns, 1); // Should share
631        assert_eq!(stats.shared_instances, 1); // One shared instance
632    }
633
634    #[test]
635    fn test_compact_alpha_memory() {
636        let mut memory = CompactAlphaMemory::new();
637
638        let mut fact1 = TypedFacts::new();
639        fact1.set("name", "Alice");
640        fact1.set("age", 30);
641
642        let mut fact2 = TypedFacts::new();
643        fact2.set("name", "Alice");
644        fact2.set("age", 30);
645
646        memory.add(&fact1);
647        memory.add(&fact2); // Duplicate
648
649        assert_eq!(memory.len(), 1); // Only 1 unique fact
650        assert_eq!(memory.total_refs(), 2); // But 2 references
651        assert!(memory.memory_savings() > 0.0);
652    }
653
654    #[test]
655    fn test_beta_memory_index() {
656        let mut index = BetaMemoryIndex::new("user_id".to_string());
657
658        let mut fact1 = TypedFacts::new();
659        fact1.set("user_id", "user123");
660        fact1.set("action", "login");
661
662        let mut fact2 = TypedFacts::new();
663        fact2.set("user_id", "user123");
664        fact2.set("action", "purchase");
665
666        index.add(&fact1, 0);
667        index.add(&fact2, 1);
668
669        let results = index.lookup("String(\"user123\")");
670        assert_eq!(results.len(), 2); // Both facts have same user_id
671    }
672
673    #[test]
674    fn test_token_pool_basic() {
675        let mut pool = TokenPool::new(10);
676
677        // Acquire token
678        let token1 = pool.acquire();
679        assert_eq!(pool.stats().in_use, 1);
680
681        // Release token
682        pool.release(token1);
683        assert_eq!(pool.stats().available, 10);
684    }
685
686    #[test]
687    fn test_token_pool_reuse() {
688        let mut pool = TokenPool::new(5);
689
690        // Acquire all tokens
691        let mut tokens = Vec::new();
692        for _ in 0..5 {
693            tokens.push(pool.acquire());
694        }
695
696        // Pool should be empty
697        assert_eq!(pool.stats().available, 0);
698
699        // Release all tokens
700        for token in tokens {
701            pool.release(token);
702        }
703
704        // Acquire again - should reuse
705        let token = pool.acquire();
706        let stats = pool.stats();
707        assert!(stats.total_reused > 0);
708        assert!(stats.reuse_rate > 0.0);
709
710        pool.release(token);
711    }
712
713    #[test]
714    fn test_optimization_manager() {
715        let mut manager = OptimizationManager::new();
716
717        assert!(manager.is_enabled());
718
719        // Test node sharing
720        let node = AlphaNode {
721            field: "score".to_string(),
722            operator: ">".to_string(),
723            value: "100".to_string(),
724        };
725
726        manager.node_sharing.register(&node, 0);
727        manager.node_sharing.register(&node, 1);
728
729        // Test token pool
730        let token = manager.token_pool.acquire();
731        manager.token_pool.release(token);
732
733        // Get stats
734        let stats = manager.stats();
735        println!("{}", stats);
736    }
737}