Skip to main content

reddb_server/storage/engine/
store_strategy.rs

1//! Triple Store Strategies
2//!
3//! Pluggable indexing strategies for graph/triple storage optimization.
4//!
5//! # Strategies
6//!
7//! - **Eager**: Index on add (high write cost, fast reads)
8//! - **Lazy**: Index on first query (lazy materialization)
9//! - **Minimal**: Minimal indexes for memory-constrained environments
10//!
11//! # Pattern Classification
12//!
13//! Queries are classified by which components are bound:
14//! - `SUB_PRE_OBJ`: All bound (exact lookup)
15//! - `SUB_ANY_ANY`: Subject bound only
16//! - `ANY_PRE_OBJ`: Predicate and object bound
17//! - etc.
18//!
19//! # References
20//!
21//! - Jena TDB `PatternType` classification
22//! - Jena `StoreStrategy` for index selection
23
24use std::collections::HashMap;
25use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
26
27fn strategy_read<'a, T>(lock: &'a RwLock<T>) -> RwLockReadGuard<'a, T> {
28    lock.read().unwrap_or_else(|poisoned| poisoned.into_inner())
29}
30
31fn strategy_write<'a, T>(lock: &'a RwLock<T>) -> RwLockWriteGuard<'a, T> {
32    lock.write()
33        .unwrap_or_else(|poisoned| poisoned.into_inner())
34}
35
36/// Pattern type for triple queries
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
38pub enum PatternType {
39    /// Subject, Predicate, Object all bound (exact match)
40    SubPreObj,
41    /// Subject, Predicate bound
42    SubPre,
43    /// Subject, Object bound
44    SubObj,
45    /// Predicate, Object bound
46    PreObj,
47    /// Only Subject bound
48    Sub,
49    /// Only Predicate bound
50    Pre,
51    /// Only Object bound
52    Obj,
53    /// Nothing bound (full scan)
54    Any,
55}
56
57impl PatternType {
58    /// Create pattern type from bound flags
59    pub fn from_bounds(sub: bool, pre: bool, obj: bool) -> Self {
60        match (sub, pre, obj) {
61            (true, true, true) => PatternType::SubPreObj,
62            (true, true, false) => PatternType::SubPre,
63            (true, false, true) => PatternType::SubObj,
64            (false, true, true) => PatternType::PreObj,
65            (true, false, false) => PatternType::Sub,
66            (false, true, false) => PatternType::Pre,
67            (false, false, true) => PatternType::Obj,
68            (false, false, false) => PatternType::Any,
69        }
70    }
71
72    /// Get selectivity estimate (lower = more selective)
73    pub fn selectivity(&self) -> f64 {
74        match self {
75            PatternType::SubPreObj => 0.001, // Most selective
76            PatternType::SubPre => 0.01,
77            PatternType::SubObj => 0.02,
78            PatternType::PreObj => 0.03,
79            PatternType::Sub => 0.1,
80            PatternType::Pre => 0.3,
81            PatternType::Obj => 0.2,
82            PatternType::Any => 1.0, // Full scan
83        }
84    }
85
86    /// Get recommended index for this pattern
87    pub fn recommended_index(&self) -> IndexType {
88        match self {
89            PatternType::SubPreObj => IndexType::SPO,
90            PatternType::SubPre => IndexType::SPO,
91            PatternType::SubObj => IndexType::SOP,
92            PatternType::PreObj => IndexType::POS,
93            PatternType::Sub => IndexType::SPO,
94            PatternType::Pre => IndexType::POS,
95            PatternType::Obj => IndexType::OPS,
96            PatternType::Any => IndexType::SPO,
97        }
98    }
99}
100
101/// Index type (triple ordering)
102#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
103pub enum IndexType {
104    /// Subject-Predicate-Object (primary)
105    SPO,
106    /// Subject-Object-Predicate
107    SOP,
108    /// Predicate-Object-Subject
109    POS,
110    /// Predicate-Subject-Object
111    PSO,
112    /// Object-Predicate-Subject
113    OPS,
114    /// Object-Subject-Predicate
115    OSP,
116}
117
118impl IndexType {
119    /// All possible index orderings
120    pub fn all() -> &'static [IndexType] {
121        &[
122            IndexType::SPO,
123            IndexType::SOP,
124            IndexType::POS,
125            IndexType::PSO,
126            IndexType::OPS,
127            IndexType::OSP,
128        ]
129    }
130
131    /// Get key ordering for this index
132    pub fn key_order(&self) -> (usize, usize, usize) {
133        match self {
134            IndexType::SPO => (0, 1, 2),
135            IndexType::SOP => (0, 2, 1),
136            IndexType::POS => (1, 2, 0),
137            IndexType::PSO => (1, 0, 2),
138            IndexType::OPS => (2, 1, 0),
139            IndexType::OSP => (2, 0, 1),
140        }
141    }
142}
143
144/// Store strategy trait
145pub trait StoreStrategy: Send + Sync {
146    /// Get strategy name
147    fn name(&self) -> &str;
148
149    /// Get indexes to maintain
150    fn indexes(&self) -> &[IndexType];
151
152    /// Should index on add?
153    fn index_on_add(&self) -> bool;
154
155    /// Select best index for pattern
156    fn select_index(&self, pattern: PatternType) -> IndexType;
157
158    /// Get estimated cost for pattern with this strategy
159    fn estimate_cost(&self, pattern: PatternType) -> f64;
160}
161
162/// Eager indexing strategy - index on every add
163#[derive(Debug, Clone)]
164pub struct EagerStoreStrategy {
165    /// Indexes to maintain
166    indexes: Vec<IndexType>,
167}
168
169impl EagerStoreStrategy {
170    /// Create with default indexes (SPO, POS, OSP for coverage)
171    pub fn new() -> Self {
172        Self {
173            indexes: vec![IndexType::SPO, IndexType::POS, IndexType::OSP],
174        }
175    }
176
177    /// Create with full indexes (all 6 orderings)
178    pub fn full() -> Self {
179        Self {
180            indexes: IndexType::all().to_vec(),
181        }
182    }
183
184    /// Create with custom indexes
185    pub fn with_indexes(indexes: Vec<IndexType>) -> Self {
186        Self { indexes }
187    }
188}
189
190impl Default for EagerStoreStrategy {
191    fn default() -> Self {
192        Self::new()
193    }
194}
195
196impl StoreStrategy for EagerStoreStrategy {
197    fn name(&self) -> &str {
198        "Eager"
199    }
200
201    fn indexes(&self) -> &[IndexType] {
202        &self.indexes
203    }
204
205    fn index_on_add(&self) -> bool {
206        true
207    }
208
209    fn select_index(&self, pattern: PatternType) -> IndexType {
210        let preferred = pattern.recommended_index();
211        if self.indexes.contains(&preferred) {
212            preferred
213        } else {
214            // Fall back to first available
215            self.indexes.first().copied().unwrap_or(IndexType::SPO)
216        }
217    }
218
219    fn estimate_cost(&self, pattern: PatternType) -> f64 {
220        // Eager has optimal read cost
221        pattern.selectivity()
222    }
223}
224
225/// Lazy indexing strategy - index on first query
226#[derive(Debug)]
227pub struct LazyStoreStrategy {
228    /// Primary index (always available)
229    primary: IndexType,
230    /// Secondary indexes (built lazily)
231    secondary: RwLock<Vec<IndexType>>,
232    /// Indexes already materialized
233    materialized: RwLock<Vec<IndexType>>,
234}
235
236impl LazyStoreStrategy {
237    /// Create with SPO as primary
238    pub fn new() -> Self {
239        Self {
240            primary: IndexType::SPO,
241            secondary: RwLock::new(vec![
242                IndexType::POS,
243                IndexType::OSP,
244                IndexType::SOP,
245                IndexType::PSO,
246                IndexType::OPS,
247            ]),
248            materialized: RwLock::new(vec![IndexType::SPO]),
249        }
250    }
251
252    /// Check if index is materialized
253    pub fn is_materialized(&self, index: IndexType) -> bool {
254        strategy_read(&self.materialized).contains(&index)
255    }
256
257    /// Mark index as materialized
258    pub fn mark_materialized(&self, index: IndexType) {
259        let mut mat = strategy_write(&self.materialized);
260        if !mat.contains(&index) {
261            mat.push(index);
262        }
263    }
264}
265
266impl Default for LazyStoreStrategy {
267    fn default() -> Self {
268        Self::new()
269    }
270}
271
272impl StoreStrategy for LazyStoreStrategy {
273    fn name(&self) -> &str {
274        "Lazy"
275    }
276
277    fn indexes(&self) -> &[IndexType] {
278        // Only return currently materialized indexes
279        // Note: This returns a static slice, so we return primary only
280        std::slice::from_ref(&self.primary)
281    }
282
283    fn index_on_add(&self) -> bool {
284        false // Only index primary on add
285    }
286
287    fn select_index(&self, pattern: PatternType) -> IndexType {
288        let preferred = pattern.recommended_index();
289        if self.is_materialized(preferred) {
290            preferred
291        } else {
292            // Use primary, but note we should materialize preferred later
293            self.primary
294        }
295    }
296
297    fn estimate_cost(&self, pattern: PatternType) -> f64 {
298        let preferred = pattern.recommended_index();
299        if self.is_materialized(preferred) {
300            pattern.selectivity()
301        } else {
302            // Higher cost when using non-optimal index
303            pattern.selectivity() * 10.0
304        }
305    }
306}
307
308/// Minimal indexing strategy - memory-constrained
309#[derive(Debug, Clone)]
310pub struct MinimalStoreStrategy {
311    /// Single index to maintain
312    index: IndexType,
313}
314
315impl MinimalStoreStrategy {
316    /// Create with SPO index
317    pub fn new() -> Self {
318        Self {
319            index: IndexType::SPO,
320        }
321    }
322
323    /// Create with custom index
324    pub fn with_index(index: IndexType) -> Self {
325        Self { index }
326    }
327}
328
329impl Default for MinimalStoreStrategy {
330    fn default() -> Self {
331        Self::new()
332    }
333}
334
335impl StoreStrategy for MinimalStoreStrategy {
336    fn name(&self) -> &str {
337        "Minimal"
338    }
339
340    fn indexes(&self) -> &[IndexType] {
341        std::slice::from_ref(&self.index)
342    }
343
344    fn index_on_add(&self) -> bool {
345        true
346    }
347
348    fn select_index(&self, _pattern: PatternType) -> IndexType {
349        self.index // Always use the single index
350    }
351
352    fn estimate_cost(&self, pattern: PatternType) -> f64 {
353        // Higher cost for patterns that don't match our single index
354        let optimal = pattern.recommended_index();
355        if optimal == self.index {
356            pattern.selectivity()
357        } else {
358            // Scan penalty
359            pattern.selectivity() * 5.0
360        }
361    }
362}
363
364/// Index statistics
365#[derive(Debug, Clone, Default)]
366pub struct IndexStats {
367    /// Number of entries
368    pub entries: u64,
369    /// Number of lookups
370    pub lookups: u64,
371    /// Number of scans
372    pub scans: u64,
373    /// Number of inserts
374    pub inserts: u64,
375    /// Cache hits
376    pub cache_hits: u64,
377    /// Cache misses
378    pub cache_misses: u64,
379}
380
381impl IndexStats {
382    /// Calculate hit rate
383    pub fn hit_rate(&self) -> f64 {
384        let total = self.cache_hits + self.cache_misses;
385        if total == 0 {
386            0.0
387        } else {
388            self.cache_hits as f64 / total as f64
389        }
390    }
391}
392
393/// Triple index (generic for any ordering)
394pub struct TripleIndex {
395    /// Index type
396    index_type: IndexType,
397    /// Storage: key -> values
398    /// Key is composite of first two elements, value is third
399    data: RwLock<HashMap<(String, String), Vec<String>>>,
400    /// Statistics
401    stats: RwLock<IndexStats>,
402}
403
404impl TripleIndex {
405    /// Create new index
406    pub fn new(index_type: IndexType) -> Self {
407        Self {
408            index_type,
409            data: RwLock::new(HashMap::new()),
410            stats: RwLock::new(IndexStats::default()),
411        }
412    }
413
414    /// Insert triple
415    pub fn insert(&self, subject: &str, predicate: &str, object: &str) {
416        let (k1, k2, v) = self.order_triple(subject, predicate, object);
417        let key = (k1.to_string(), k2.to_string());
418
419        let mut data = strategy_write(&self.data);
420        data.entry(key).or_default().push(v.to_string());
421
422        let mut stats = strategy_write(&self.stats);
423        stats.inserts += 1;
424        stats.entries += 1;
425    }
426
427    /// Lookup with prefix
428    pub fn lookup(&self, first: &str, second: Option<&str>) -> Vec<(String, String, String)> {
429        let data = strategy_read(&self.data);
430        let mut results = Vec::new();
431
432        let mut stats = strategy_write(&self.stats);
433        stats.lookups += 1;
434
435        for ((k1, k2), values) in data.iter() {
436            if k1 == first {
437                if let Some(s) = second {
438                    if k2 == s {
439                        for v in values {
440                            results.push(self.restore_triple(k1, k2, v));
441                        }
442                    }
443                } else {
444                    for v in values {
445                        results.push(self.restore_triple(k1, k2, v));
446                    }
447                }
448            }
449        }
450
451        results
452    }
453
454    /// Scan all entries
455    pub fn scan(&self) -> Vec<(String, String, String)> {
456        let data = strategy_read(&self.data);
457        let mut results = Vec::new();
458
459        let mut stats = strategy_write(&self.stats);
460        stats.scans += 1;
461
462        for ((k1, k2), values) in data.iter() {
463            for v in values {
464                results.push(self.restore_triple(k1, k2, v));
465            }
466        }
467
468        results
469    }
470
471    /// Get statistics
472    pub fn stats(&self) -> IndexStats {
473        strategy_read(&self.stats).clone()
474    }
475
476    /// Order triple according to index type
477    fn order_triple<'a>(&self, s: &'a str, p: &'a str, o: &'a str) -> (&'a str, &'a str, &'a str) {
478        let parts = [s, p, o];
479        let (i1, i2, i3) = self.index_type.key_order();
480        (parts[i1], parts[i2], parts[i3])
481    }
482
483    /// Restore original triple order from index order
484    fn restore_triple(&self, k1: &str, k2: &str, v: &str) -> (String, String, String) {
485        let (i1, i2, i3) = self.index_type.key_order();
486        let mut result = ["", "", ""];
487        result[i1] = k1;
488        result[i2] = k2;
489        result[i3] = v;
490        (
491            result[0].to_string(),
492            result[1].to_string(),
493            result[2].to_string(),
494        )
495    }
496}
497
498/// Triple store with pluggable strategy
499pub struct TripleStore {
500    /// Indexing strategy
501    strategy: Arc<dyn StoreStrategy>,
502    /// Indexes
503    indexes: RwLock<HashMap<IndexType, Arc<TripleIndex>>>,
504}
505
506impl TripleStore {
507    /// Create with strategy
508    pub fn new(strategy: Arc<dyn StoreStrategy>) -> Self {
509        let mut indexes = HashMap::new();
510
511        // Create indexes according to strategy
512        for &idx_type in strategy.indexes() {
513            indexes.insert(idx_type, Arc::new(TripleIndex::new(idx_type)));
514        }
515
516        Self {
517            strategy,
518            indexes: RwLock::new(indexes),
519        }
520    }
521
522    /// Create with eager strategy
523    pub fn eager() -> Self {
524        Self::new(Arc::new(EagerStoreStrategy::new()))
525    }
526
527    /// Create with lazy strategy
528    pub fn lazy() -> Self {
529        Self::new(Arc::new(LazyStoreStrategy::new()))
530    }
531
532    /// Create with minimal strategy
533    pub fn minimal() -> Self {
534        Self::new(Arc::new(MinimalStoreStrategy::new()))
535    }
536
537    /// Add triple
538    pub fn add(&self, subject: &str, predicate: &str, object: &str) {
539        if self.strategy.index_on_add() {
540            let indexes = strategy_read(&self.indexes);
541            for index in indexes.values() {
542                index.insert(subject, predicate, object);
543            }
544        } else {
545            // Lazy: only update primary index
546            let indexes = strategy_read(&self.indexes);
547            if let Some(primary) = indexes.values().next() {
548                primary.insert(subject, predicate, object);
549            }
550        }
551    }
552
553    /// Query with pattern
554    pub fn query(
555        &self,
556        subject: Option<&str>,
557        predicate: Option<&str>,
558        object: Option<&str>,
559    ) -> Vec<(String, String, String)> {
560        let pattern =
561            PatternType::from_bounds(subject.is_some(), predicate.is_some(), object.is_some());
562
563        let index_type = self.strategy.select_index(pattern);
564        let indexes = strategy_read(&self.indexes);
565
566        if let Some(index) = indexes.get(&index_type) {
567            // Use appropriate lookup based on pattern.
568            //
569            // `PatternType::from_bounds(s, p, o)` encodes which of the three
570            // triple positions are bound. Every `.expect()` below asserts
571            // the invariant that its caller already encoded via the variant
572            // name: `Sub*` means `subject.is_some()`, `*Pre*` means
573            // `predicate.is_some()`, `*Obj` means `object.is_some()`.
574            match pattern {
575                PatternType::SubPreObj => {
576                    // Exact match — all three bound.
577                    let s = subject.expect("invariant: PatternType::SubPreObj => subject set");
578                    let p = predicate.expect("invariant: PatternType::SubPreObj => predicate set");
579                    let o = object.expect("invariant: PatternType::SubPreObj => object set");
580                    index
581                        .lookup(s, Some(p))
582                        .into_iter()
583                        .filter(|(_, _, triple_o)| triple_o == o)
584                        .collect()
585                }
586                PatternType::SubPre => {
587                    let s = subject.expect("invariant: PatternType::SubPre => subject set");
588                    let p = predicate.expect("invariant: PatternType::SubPre => predicate set");
589                    index.lookup(s, Some(p))
590                }
591                PatternType::Sub => {
592                    let s = subject.expect("invariant: PatternType::Sub => subject set");
593                    index.lookup(s, None)
594                }
595                _ => {
596                    // Fall back to scan with filter
597                    index
598                        .scan()
599                        .into_iter()
600                        .filter(|(s, p, o)| {
601                            subject.is_none_or(|sub| s == sub)
602                                && predicate.is_none_or(|pre| p == pre)
603                                && object.is_none_or(|obj| o == obj)
604                        })
605                        .collect()
606                }
607            }
608        } else {
609            Vec::new()
610        }
611    }
612
613    /// Get strategy name
614    pub fn strategy_name(&self) -> &str {
615        self.strategy.name()
616    }
617
618    /// Get index statistics
619    pub fn index_stats(&self) -> HashMap<IndexType, IndexStats> {
620        let indexes = strategy_read(&self.indexes);
621        indexes.iter().map(|(&k, v)| (k, v.stats())).collect()
622    }
623}
624
625#[cfg(test)]
626mod tests {
627    use super::*;
628
629    #[test]
630    fn test_pattern_type() {
631        assert_eq!(
632            PatternType::from_bounds(true, true, true),
633            PatternType::SubPreObj
634        );
635        assert_eq!(
636            PatternType::from_bounds(true, false, false),
637            PatternType::Sub
638        );
639        assert_eq!(
640            PatternType::from_bounds(false, false, false),
641            PatternType::Any
642        );
643    }
644
645    #[test]
646    fn test_eager_strategy() {
647        let strategy = EagerStoreStrategy::new();
648        assert_eq!(strategy.name(), "Eager");
649        assert!(strategy.index_on_add());
650        assert_eq!(strategy.indexes().len(), 3);
651    }
652
653    #[test]
654    fn test_minimal_strategy() {
655        let strategy = MinimalStoreStrategy::new();
656        assert_eq!(strategy.name(), "Minimal");
657        assert_eq!(strategy.indexes().len(), 1);
658    }
659
660    #[test]
661    fn test_triple_index() {
662        let index = TripleIndex::new(IndexType::SPO);
663
664        index.insert("alice", "knows", "bob");
665        index.insert("alice", "knows", "charlie");
666        index.insert("bob", "knows", "alice");
667
668        let results = index.lookup("alice", None);
669        assert_eq!(results.len(), 2);
670
671        let results = index.lookup("alice", Some("knows"));
672        assert_eq!(results.len(), 2);
673    }
674
675    #[test]
676    fn test_triple_store_eager() {
677        let store = TripleStore::eager();
678
679        store.add("alice", "knows", "bob");
680        store.add("alice", "likes", "coffee");
681        store.add("bob", "knows", "charlie");
682
683        let results = store.query(Some("alice"), None, None);
684        assert_eq!(results.len(), 2);
685
686        let results = store.query(Some("alice"), Some("knows"), None);
687        assert_eq!(results.len(), 1);
688
689        let results = store.query(None, Some("knows"), None);
690        assert_eq!(results.len(), 2);
691    }
692
693    #[test]
694    fn test_triple_store_minimal() {
695        let store = TripleStore::minimal();
696
697        store.add("alice", "knows", "bob");
698        store.add("bob", "knows", "charlie");
699
700        // Minimal still works but may be slower for some patterns
701        let results = store.query(Some("alice"), None, None);
702        assert_eq!(results.len(), 1);
703    }
704
705    #[test]
706    fn test_index_type_ordering() {
707        let spo = IndexType::SPO;
708        assert_eq!(spo.key_order(), (0, 1, 2));
709
710        let pos = IndexType::POS;
711        assert_eq!(pos.key_order(), (1, 2, 0));
712    }
713
714    #[test]
715    fn test_pattern_selectivity() {
716        assert!(PatternType::SubPreObj.selectivity() < PatternType::Sub.selectivity());
717        assert!(PatternType::Sub.selectivity() < PatternType::Any.selectivity());
718    }
719
720    #[test]
721    fn test_lazy_strategy_recovers_after_materialized_lock_poisoning() {
722        let strategy = Arc::new(LazyStoreStrategy::new());
723        let poison_target = Arc::clone(&strategy);
724        let _ = std::thread::spawn(move || {
725            let _guard = poison_target
726                .materialized
727                .write()
728                .expect("materialized lock should be acquired");
729            panic!("poison materialized lock");
730        })
731        .join();
732
733        strategy.mark_materialized(IndexType::POS);
734        assert!(strategy.is_materialized(IndexType::POS));
735    }
736
737    #[test]
738    fn test_triple_store_recovers_after_indexes_lock_poisoning() {
739        let store = Arc::new(TripleStore::eager());
740        let poison_target = Arc::clone(&store);
741        let _ = std::thread::spawn(move || {
742            let _guard = poison_target
743                .indexes
744                .write()
745                .expect("indexes lock should be acquired");
746            panic!("poison indexes lock");
747        })
748        .join();
749
750        store.add("alice", "knows", "bob");
751        let results = store.query(Some("alice"), Some("knows"), Some("bob"));
752        assert_eq!(results.len(), 1);
753        assert!(!store.index_stats().is_empty());
754    }
755}