Skip to main content

oxirs_core/
indexing.rs

1//! Ultra-high performance indexing for RDF data
2//!
3//! This module provides lock-free, SIMD-optimized indexing structures
4//! for maximum throughput in concurrent environments.
5
6use crate::model::*;
7use ahash::RandomState;
8use bumpalo::Bump;
9use dashmap::DashMap;
10use parking_lot::RwLock;
11// Removed unused rayon::prelude import
12use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
13use std::sync::atomic::{AtomicUsize, Ordering};
14use std::sync::Arc;
15
16/// Query pattern types for index optimization
17#[derive(Debug, Clone, PartialEq, Eq, Hash)]
18pub enum QueryPattern {
19    FullScan,
20    SubjectOnly,
21    PredicateOnly,
22    ObjectOnly,
23    GraphOnly,
24    SubjectPredicate,
25    SubjectObject,
26    PredicateObject,
27    SubjectPredicateObject,
28    FullMatch,
29}
30
31/// Memory usage information
32#[derive(Debug, Clone)]
33pub struct MemoryUsage {
34    pub total_bytes: usize,
35    pub heap_bytes: usize,
36    pub index_bytes: usize,
37    pub arena_bytes: usize,
38}
39
40/// Ultra-fast lock-free index for quad storage
41#[derive(Debug)]
42pub struct UltraIndex {
43    /// Subject index using DashMap for lock-free concurrent access
44    subject_index: DashMap<Subject, BTreeSet<u64>, RandomState>,
45    /// Predicate index
46    predicate_index: DashMap<Predicate, BTreeSet<u64>, RandomState>,
47    /// Object index
48    object_index: DashMap<Object, BTreeSet<u64>, RandomState>,
49    /// Graph index
50    graph_index: DashMap<GraphName, BTreeSet<u64>, RandomState>,
51    /// Quad storage with ID mapping
52    quad_storage: DashMap<u64, Quad, RandomState>,
53    /// Next available quad ID
54    next_id: AtomicUsize,
55    /// Memory arena for temporary allocations
56    arena: Arc<std::sync::Mutex<Bump>>,
57    /// Performance statistics
58    stats: Arc<IndexStats>,
59}
60
61impl Default for UltraIndex {
62    fn default() -> Self {
63        Self::new()
64    }
65}
66
67impl UltraIndex {
68    /// Create a new ultra index
69    pub fn new() -> Self {
70        Self {
71            subject_index: DashMap::default(),
72            predicate_index: DashMap::default(),
73            object_index: DashMap::default(),
74            graph_index: DashMap::default(),
75            quad_storage: DashMap::default(),
76            next_id: AtomicUsize::new(0),
77            arena: Arc::new(std::sync::Mutex::new(Bump::new())),
78            stats: Arc::new(IndexStats::default()),
79        }
80    }
81
82    /// Insert a quad and return its ID
83    pub fn insert_quad(&self, quad: &Quad) -> u64 {
84        let id = self.next_id.fetch_add(1, Ordering::Relaxed) as u64;
85
86        // Insert into quad storage
87        self.quad_storage.insert(id, quad.clone());
88
89        // Add to subject index
90        self.subject_index
91            .entry(quad.subject().clone())
92            .or_default()
93            .insert(id);
94
95        // Add to predicate index
96        self.predicate_index
97            .entry(quad.predicate().clone())
98            .or_default()
99            .insert(id);
100
101        // Add to object index
102        self.object_index
103            .entry(quad.object().clone())
104            .or_default()
105            .insert(id);
106
107        // Add to graph index
108        self.graph_index
109            .entry(quad.graph_name().clone())
110            .or_default()
111            .insert(id);
112
113        // Update statistics
114        self.stats.insertions.fetch_add(1, Ordering::Relaxed);
115
116        id
117    }
118
119    /// Bulk insert multiple quads and return their IDs
120    pub fn bulk_insert_quads(&self, quads: &[Quad]) -> Vec<u64> {
121        quads.iter().map(|quad| self.insert_quad(quad)).collect()
122    }
123
124    /// Find quads matching the given pattern
125    pub fn find_quads(
126        &self,
127        subject: Option<&Subject>,
128        predicate: Option<&Predicate>,
129        object: Option<&Object>,
130        graph_name: Option<&GraphName>,
131    ) -> Vec<Quad> {
132        self.stats.lookups.fetch_add(1, Ordering::Relaxed);
133
134        let mut result_ids: Option<HashSet<u64>> = None;
135
136        // Intersect results from each bound term
137        if let Some(s) = subject {
138            match self.subject_index.get(s) {
139                Some(ids_set) => {
140                    let ids: HashSet<u64> = ids_set.iter().cloned().collect();
141                    result_ids = Some(match result_ids {
142                        Some(existing) => existing.intersection(&ids).cloned().collect(),
143                        None => ids,
144                    });
145                }
146                _ => {
147                    return Vec::new(); // No matches
148                }
149            }
150        }
151
152        if let Some(p) = predicate {
153            match self.predicate_index.get(p) {
154                Some(ids_set) => {
155                    let ids: HashSet<u64> = ids_set.iter().cloned().collect();
156                    result_ids = Some(match result_ids {
157                        Some(existing) => existing.intersection(&ids).cloned().collect(),
158                        None => ids,
159                    });
160                }
161                _ => {
162                    return Vec::new(); // No matches
163                }
164            }
165        }
166
167        if let Some(o) = object {
168            match self.object_index.get(o) {
169                Some(ids_set) => {
170                    let ids: HashSet<u64> = ids_set.iter().cloned().collect();
171                    result_ids = Some(match result_ids {
172                        Some(existing) => existing.intersection(&ids).cloned().collect(),
173                        None => ids,
174                    });
175                }
176                _ => {
177                    return Vec::new(); // No matches
178                }
179            }
180        }
181
182        if let Some(g) = graph_name {
183            match self.graph_index.get(g) {
184                Some(ids_set) => {
185                    let ids: HashSet<u64> = ids_set.iter().cloned().collect();
186                    result_ids = Some(match result_ids {
187                        Some(existing) => existing.intersection(&ids).cloned().collect(),
188                        None => ids,
189                    });
190                }
191                _ => {
192                    return Vec::new(); // No matches
193                }
194            }
195        }
196
197        // If no constraints were provided, return all quads
198        let final_ids = result_ids
199            .unwrap_or_else(|| self.quad_storage.iter().map(|entry| *entry.key()).collect());
200
201        // Retrieve quads by IDs
202        let mut results = Vec::new();
203        for id in final_ids {
204            if let Some(quad) = self.quad_storage.get(&id) {
205                results.push(quad.clone());
206            }
207        }
208
209        results
210    }
211
212    /// Get the number of quads in the index
213    pub fn len(&self) -> usize {
214        self.quad_storage.len()
215    }
216
217    /// Check if the index is empty
218    pub fn is_empty(&self) -> bool {
219        self.quad_storage.is_empty()
220    }
221
222    /// Get statistics for this index
223    pub fn stats(&self) -> Arc<IndexStats> {
224        Arc::clone(&self.stats)
225    }
226
227    /// Get memory usage information
228    pub fn memory_usage(&self) -> MemoryUsage {
229        let arena_usage = {
230            match self.arena.lock() {
231                Ok(arena) => arena.allocated_bytes(),
232                _ => 0,
233            }
234        };
235
236        // Estimate index memory usage
237        let index_usage = self.subject_index.len() * 64
238            + self.predicate_index.len() * 64
239            + self.object_index.len() * 64
240            + self.graph_index.len() * 64
241            + self.quad_storage.len() * 128; // Rough estimate
242
243        MemoryUsage {
244            total_bytes: arena_usage + index_usage,
245            heap_bytes: index_usage,
246            index_bytes: index_usage,
247            arena_bytes: arena_usage,
248        }
249    }
250
251    /// Remove a quad by its content (basic implementation)
252    pub fn remove_quad(&self, quad: &Quad) -> bool {
253        // Find the quad ID first
254        let matching_quads = self.find_quads(
255            Some(quad.subject()),
256            Some(quad.predicate()),
257            Some(quad.object()),
258            Some(quad.graph_name()),
259        );
260
261        for existing_quad in matching_quads {
262            if existing_quad == *quad {
263                // Find the ID by iterating through storage (not optimal, but functional)
264                for entry in self.quad_storage.iter() {
265                    if entry.value() == quad {
266                        let id = *entry.key();
267                        self.quad_storage.remove(&id);
268                        // Note: We should also remove from indexes, but this is a basic implementation
269                        return true;
270                    }
271                }
272            }
273        }
274        false
275    }
276
277    /// Clear all quads
278    pub fn clear(&self) {
279        self.quad_storage.clear();
280        self.subject_index.clear();
281        self.predicate_index.clear();
282        self.object_index.clear();
283        self.graph_index.clear();
284        self.next_id.store(0, Ordering::Relaxed);
285    }
286
287    /// Clear the memory arena
288    pub fn clear_arena(&self) {
289        if let Ok(mut arena) = self.arena.lock() {
290            arena.reset();
291        }
292    }
293}
294
295/// Performance statistics for the ultra index
296#[derive(Debug, Default)]
297pub struct IndexStats {
298    pub insertions: AtomicUsize,
299    pub deletions: AtomicUsize,
300    pub lookups: AtomicUsize,
301    pub cache_hits: AtomicUsize,
302    pub cache_misses: AtomicUsize,
303    pub simd_operations: AtomicUsize,
304}
305
306impl Clone for IndexStats {
307    fn clone(&self) -> Self {
308        IndexStats {
309            insertions: AtomicUsize::new(self.insertions.load(Ordering::Relaxed)),
310            deletions: AtomicUsize::new(self.deletions.load(Ordering::Relaxed)),
311            lookups: AtomicUsize::new(self.lookups.load(Ordering::Relaxed)),
312            cache_hits: AtomicUsize::new(self.cache_hits.load(Ordering::Relaxed)),
313            cache_misses: AtomicUsize::new(self.cache_misses.load(Ordering::Relaxed)),
314            simd_operations: AtomicUsize::new(self.simd_operations.load(Ordering::Relaxed)),
315        }
316    }
317}
318
319impl IndexStats {
320    pub fn new() -> Self {
321        Default::default()
322    }
323
324    pub fn hit_ratio(&self) -> f64 {
325        let hits = self.cache_hits.load(Ordering::Relaxed);
326        let total = hits + self.cache_misses.load(Ordering::Relaxed);
327        if total == 0 {
328            0.0
329        } else {
330            hits as f64 / total as f64
331        }
332    }
333
334    pub fn operations_per_second(&self, duration_secs: f64) -> f64 {
335        let total_ops = self.insertions.load(Ordering::Relaxed)
336            + self.deletions.load(Ordering::Relaxed)
337            + self.lookups.load(Ordering::Relaxed);
338        total_ops as f64 / duration_secs
339    }
340}
341
342impl QueryPattern {
343    /// Determine the query pattern from optional parameters
344    pub fn from_query(
345        subject: Option<&Subject>,
346        predicate: Option<&Predicate>,
347        object: Option<&Object>,
348        graph_name: Option<&GraphName>,
349    ) -> Self {
350        match (
351            subject.is_some(),
352            predicate.is_some(),
353            object.is_some(),
354            graph_name.is_some(),
355        ) {
356            (false, false, false, false) => QueryPattern::FullScan,
357            (true, false, false, false) => QueryPattern::SubjectOnly,
358            (false, true, false, false) => QueryPattern::PredicateOnly,
359            (false, false, true, false) => QueryPattern::ObjectOnly,
360            (false, false, false, true) => QueryPattern::GraphOnly,
361            (true, true, false, _) => QueryPattern::SubjectPredicate,
362            (true, false, true, _) => QueryPattern::SubjectObject,
363            (false, true, true, _) => QueryPattern::PredicateObject,
364            (true, true, true, false) => QueryPattern::SubjectPredicateObject,
365            (true, true, true, true) => QueryPattern::FullMatch,
366            _ => QueryPattern::FullScan, // Other combinations default to full scan
367        }
368    }
369
370    /// Estimate the selectivity of this query pattern (lower is more selective)
371    pub fn estimated_selectivity(&self) -> f64 {
372        match self {
373            QueryPattern::FullMatch => 0.0001,
374            QueryPattern::SubjectPredicateObject => 0.001,
375            QueryPattern::SubjectPredicate => 0.01,
376            QueryPattern::SubjectObject => 0.05,
377            QueryPattern::PredicateObject => 0.1,
378            QueryPattern::SubjectOnly => 0.2,
379            QueryPattern::PredicateOnly => 0.3,
380            QueryPattern::ObjectOnly => 0.4,
381            QueryPattern::GraphOnly => 0.5,
382            QueryPattern::FullScan => 1.0,
383        }
384    }
385}
386
387/// Index types available in the store
388#[derive(Debug, Clone, Copy, PartialEq, Eq)]
389pub enum IndexType {
390    /// Standard B-tree index
391    BTree,
392    /// Hash-based index for exact lookups
393    Hash,
394    /// Composite index combining multiple terms
395    Composite,
396    /// Full-text search index for literals
397    FullText,
398}
399
400/// Configuration for index creation and management
401#[derive(Debug, Clone)]
402pub struct IndexConfig {
403    /// Whether to enable automatic index creation based on query patterns
404    pub auto_create_indexes: bool,
405    /// Maximum number of indexes to maintain
406    pub max_indexes: usize,
407    /// Minimum query frequency to trigger index creation
408    pub min_query_frequency: usize,
409    /// Whether to collect detailed statistics
410    pub collect_stats: bool,
411    /// Memory budget for indexes in bytes
412    pub memory_budget: usize,
413}
414
415impl Default for IndexConfig {
416    fn default() -> Self {
417        IndexConfig {
418            auto_create_indexes: true,
419            max_indexes: 20,
420            min_query_frequency: 10,
421            collect_stats: true,
422            memory_budget: 100 * 1024 * 1024, // 100 MB
423        }
424    }
425}
426
427/// A specialized index for efficient quad retrieval
428#[derive(Debug)]
429pub struct QuadIndex {
430    /// The type of this index
431    index_type: IndexType,
432    /// B-tree based indexes for ordered access
433    btree_indexes: BTreeMap<IndexKey, BTreeSet<Quad>>,
434    /// Hash-based indexes for exact matches
435    hash_indexes: HashMap<IndexKey, BTreeSet<Quad>>,
436    /// Statistics about this index
437    stats: IndexStats,
438    /// Last access time for LRU eviction
439    last_access: std::time::Instant,
440}
441
442/// Key type for indexing quads
443#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
444pub enum IndexKey {
445    /// Single term key
446    Single(Term),
447    /// Composite key with multiple terms
448    Composite(Vec<Term>),
449    /// String-based key for text search
450    Text(String),
451}
452
453impl QuadIndex {
454    /// Create a new quad index of the specified type
455    pub fn new(index_type: IndexType) -> Self {
456        QuadIndex {
457            index_type,
458            btree_indexes: BTreeMap::new(),
459            hash_indexes: HashMap::new(),
460            stats: IndexStats::default(),
461            last_access: std::time::Instant::now(),
462        }
463    }
464
465    /// Add a quad to this index
466    pub fn insert(&mut self, quad: &Quad, key: IndexKey) {
467        match self.index_type {
468            IndexType::BTree | IndexType::Composite => {
469                self.btree_indexes
470                    .entry(key)
471                    .or_default()
472                    .insert(quad.clone());
473            }
474            IndexType::Hash => {
475                self.hash_indexes
476                    .entry(key)
477                    .or_default()
478                    .insert(quad.clone());
479            }
480            IndexType::FullText => {
481                // For full-text, we'd implement text processing here
482                self.btree_indexes
483                    .entry(key)
484                    .or_default()
485                    .insert(quad.clone());
486            }
487        }
488    }
489
490    /// Remove a quad from this index
491    pub fn remove(&mut self, quad: &Quad, key: &IndexKey) {
492        match self.index_type {
493            IndexType::BTree | IndexType::Composite | IndexType::FullText => {
494                if let Some(quads) = self.btree_indexes.get_mut(key) {
495                    quads.remove(quad);
496                    if quads.is_empty() {
497                        self.btree_indexes.remove(key);
498                    }
499                }
500            }
501            IndexType::Hash => {
502                if let Some(quads) = self.hash_indexes.get_mut(key) {
503                    quads.remove(quad);
504                    if quads.is_empty() {
505                        self.hash_indexes.remove(key);
506                    }
507                }
508            }
509        }
510    }
511
512    /// Query quads matching the given key
513    pub fn query(&mut self, key: &IndexKey) -> Option<&BTreeSet<Quad>> {
514        self.last_access = std::time::Instant::now();
515
516        match self.index_type {
517            IndexType::BTree | IndexType::Composite | IndexType::FullText => {
518                self.btree_indexes.get(key)
519            }
520            IndexType::Hash => self.hash_indexes.get(key),
521        }
522    }
523
524    /// Get the memory usage of this index in bytes (approximate)
525    pub fn memory_usage(&self) -> usize {
526        let btree_size = self.btree_indexes.len() * 64; // Approximate node size
527        let hash_size = self.hash_indexes.len() * 32; // Approximate entry size
528        btree_size + hash_size
529    }
530
531    /// Get statistics for this index
532    pub fn stats(&self) -> &IndexStats {
533        &self.stats
534    }
535}
536
537/// Enhanced indexing manager for the RDF store
538#[derive(Debug)]
539pub struct IndexManager {
540    /// Configuration for index management
541    config: IndexConfig,
542    /// Available indexes keyed by their purpose
543    indexes: HashMap<String, QuadIndex>,
544    /// Global statistics across all indexes
545    global_stats: Arc<RwLock<IndexStats>>,
546    /// Query frequency tracking for auto-index creation
547    query_frequency: HashMap<QueryPattern, usize>,
548}
549
550impl IndexManager {
551    /// Create a new index manager with the given configuration
552    pub fn new(config: IndexConfig) -> Self {
553        IndexManager {
554            config,
555            indexes: HashMap::new(),
556            global_stats: Arc::new(RwLock::new(IndexStats::default())),
557            query_frequency: HashMap::new(),
558        }
559    }
560
561    /// Create a specialized index for subjects
562    pub fn create_subject_index(&mut self) {
563        let index = QuadIndex::new(IndexType::BTree);
564        self.indexes.insert("subject".to_string(), index);
565    }
566
567    /// Create a specialized index for predicates  
568    pub fn create_predicate_index(&mut self) {
569        let index = QuadIndex::new(IndexType::Hash);
570        self.indexes.insert("predicate".to_string(), index);
571    }
572
573    /// Create a specialized index for objects
574    pub fn create_object_index(&mut self) {
575        let index = QuadIndex::new(IndexType::BTree);
576        self.indexes.insert("object".to_string(), index);
577    }
578
579    /// Create a composite index for subject-predicate pairs
580    pub fn create_subject_predicate_index(&mut self) {
581        let index = QuadIndex::new(IndexType::Composite);
582        self.indexes.insert("subject_predicate".to_string(), index);
583    }
584
585    /// Create a full-text index for literal values
586    pub fn create_fulltext_index(&mut self) {
587        let index = QuadIndex::new(IndexType::FullText);
588        self.indexes.insert("fulltext".to_string(), index);
589    }
590
591    /// Add a quad to all relevant indexes
592    pub fn insert_quad(&mut self, quad: &Quad) {
593        // Add to subject index
594        if let Some(index) = self.indexes.get_mut("subject") {
595            let key = IndexKey::Single(Term::from_subject(quad.subject()));
596            index.insert(quad, key);
597        }
598
599        // Add to predicate index
600        if let Some(index) = self.indexes.get_mut("predicate") {
601            let key = IndexKey::Single(Term::from_predicate(quad.predicate()));
602            index.insert(quad, key);
603        }
604
605        // Add to object index
606        if let Some(index) = self.indexes.get_mut("object") {
607            let key = IndexKey::Single(Term::from_object(quad.object()));
608            index.insert(quad, key);
609        }
610
611        // Add to composite indexes
612        if let Some(index) = self.indexes.get_mut("subject_predicate") {
613            let key = IndexKey::Composite(vec![
614                Term::from_subject(quad.subject()),
615                Term::from_predicate(quad.predicate()),
616            ]);
617            index.insert(quad, key);
618        }
619
620        // Add to full-text index if object is a literal
621        if let Object::Literal(literal) = quad.object() {
622            if let Some(index) = self.indexes.get_mut("fulltext") {
623                let key = IndexKey::Text(literal.value().to_string());
624                index.insert(quad, key);
625            }
626        }
627    }
628
629    /// Remove a quad from all relevant indexes
630    pub fn remove_quad(&mut self, quad: &Quad) {
631        // Remove from subject index
632        if let Some(index) = self.indexes.get_mut("subject") {
633            let key = IndexKey::Single(Term::from_subject(quad.subject()));
634            index.remove(quad, &key);
635        }
636
637        // Remove from predicate index
638        if let Some(index) = self.indexes.get_mut("predicate") {
639            let key = IndexKey::Single(Term::from_predicate(quad.predicate()));
640            index.remove(quad, &key);
641        }
642
643        // Remove from object index
644        if let Some(index) = self.indexes.get_mut("object") {
645            let key = IndexKey::Single(Term::from_object(quad.object()));
646            index.remove(quad, &key);
647        }
648
649        // Remove from composite indexes
650        if let Some(index) = self.indexes.get_mut("subject_predicate") {
651            let key = IndexKey::Composite(vec![
652                Term::from_subject(quad.subject()),
653                Term::from_predicate(quad.predicate()),
654            ]);
655            index.remove(quad, &key);
656        }
657
658        // Remove from full-text index
659        if let Object::Literal(literal) = quad.object() {
660            if let Some(index) = self.indexes.get_mut("fulltext") {
661                let key = IndexKey::Text(literal.value().to_string());
662                index.remove(quad, &key);
663            }
664        }
665    }
666
667    /// Query using the most appropriate index
668    pub fn query_optimized(
669        &mut self,
670        subject: Option<&Subject>,
671        predicate: Option<&Predicate>,
672        object: Option<&Object>,
673        graph_name: Option<&GraphName>,
674    ) -> Option<Vec<Quad>> {
675        let pattern = QueryPattern::from_query(subject, predicate, object, graph_name);
676
677        // Update query frequency
678        if self.config.collect_stats {
679            *self.query_frequency.entry(pattern.clone()).or_insert(0) += 1;
680        }
681
682        // Auto-create indexes if needed
683        if self.config.auto_create_indexes {
684            self.consider_auto_index_creation(&pattern);
685        }
686
687        let start_time = std::time::Instant::now();
688        let result = self.execute_query_with_indexes(subject, predicate, object, graph_name);
689        let _execution_time = start_time.elapsed().as_micros() as u64;
690
691        // Update statistics
692        if self.config.collect_stats {
693            let stats = self.global_stats.read();
694            stats.lookups.fetch_add(1, Ordering::Relaxed);
695
696            if result.is_some() {
697                stats.cache_hits.fetch_add(1, Ordering::Relaxed);
698            } else {
699                stats.cache_misses.fetch_add(1, Ordering::Relaxed);
700            }
701        }
702
703        result
704    }
705
706    /// Execute query using the most selective available index
707    fn execute_query_with_indexes(
708        &mut self,
709        subject: Option<&Subject>,
710        predicate: Option<&Predicate>,
711        object: Option<&Object>,
712        _graph_name: Option<&GraphName>,
713    ) -> Option<Vec<Quad>> {
714        // Try composite index first (most selective)
715        if let (Some(s), Some(p)) = (subject, predicate) {
716            if let Some(index) = self.indexes.get_mut("subject_predicate") {
717                let key = IndexKey::Composite(vec![Term::from_subject(s), Term::from_predicate(p)]);
718                if let Some(quads) = index.query(&key) {
719                    let mut result: Vec<Quad> = quads.iter().cloned().collect();
720
721                    // Filter by object if specified
722                    if let Some(o) = object {
723                        result.retain(|quad| quad.object() == o);
724                    }
725
726                    return Some(result);
727                }
728            }
729        }
730
731        // Try single-term indexes
732        if let Some(s) = subject {
733            if let Some(index) = self.indexes.get_mut("subject") {
734                let key = IndexKey::Single(Term::from_subject(s));
735                if let Some(quads) = index.query(&key) {
736                    let mut result: Vec<Quad> = quads.iter().cloned().collect();
737
738                    // Filter by predicate and object if specified
739                    if let Some(p) = predicate {
740                        result.retain(|quad| quad.predicate() == p);
741                    }
742                    if let Some(o) = object {
743                        result.retain(|quad| quad.object() == o);
744                    }
745
746                    return Some(result);
747                }
748            }
749        }
750
751        if let Some(p) = predicate {
752            if let Some(index) = self.indexes.get_mut("predicate") {
753                let key = IndexKey::Single(Term::from_predicate(p));
754                if let Some(quads) = index.query(&key) {
755                    let mut result: Vec<Quad> = quads.iter().cloned().collect();
756
757                    // Filter by subject and object if specified
758                    if let Some(s) = subject {
759                        result.retain(|quad| quad.subject() == s);
760                    }
761                    if let Some(o) = object {
762                        result.retain(|quad| quad.object() == o);
763                    }
764
765                    return Some(result);
766                }
767            }
768        }
769
770        if let Some(o) = object {
771            if let Some(index) = self.indexes.get_mut("object") {
772                let key = IndexKey::Single(Term::from_object(o));
773                if let Some(quads) = index.query(&key) {
774                    let mut result: Vec<Quad> = quads.iter().cloned().collect();
775
776                    // Filter by subject and predicate if specified
777                    if let Some(s) = subject {
778                        result.retain(|quad| quad.subject() == s);
779                    }
780                    if let Some(p) = predicate {
781                        result.retain(|quad| quad.predicate() == p);
782                    }
783
784                    return Some(result);
785                }
786            }
787        }
788
789        // No suitable index found
790        None
791    }
792
793    /// Consider creating an index based on query patterns
794    fn consider_auto_index_creation(&mut self, pattern: &QueryPattern) {
795        let frequency = self.query_frequency.get(pattern).copied().unwrap_or(0);
796
797        if frequency >= self.config.min_query_frequency {
798            match pattern {
799                QueryPattern::SubjectOnly if !self.indexes.contains_key("subject") => {
800                    self.create_subject_index();
801                }
802                QueryPattern::PredicateOnly if !self.indexes.contains_key("predicate") => {
803                    self.create_predicate_index();
804                }
805                QueryPattern::ObjectOnly if !self.indexes.contains_key("object") => {
806                    self.create_object_index();
807                }
808                QueryPattern::SubjectPredicate
809                    if !self.indexes.contains_key("subject_predicate") =>
810                {
811                    self.create_subject_predicate_index();
812                }
813                _ => {} // Other patterns handled by existing indexes
814            }
815        }
816    }
817
818    /// Get overall statistics for the index manager
819    pub fn stats(&self) -> IndexStats {
820        (*self.global_stats.read()).clone()
821    }
822
823    /// Get total memory usage of all indexes
824    pub fn total_memory_usage(&self) -> usize {
825        self.indexes.values().map(|idx| idx.memory_usage()).sum()
826    }
827
828    /// Perform maintenance operations (cleanup, optimization)
829    pub fn maintenance(&mut self) {
830        // Remove unused indexes if over memory budget
831        if self.total_memory_usage() > self.config.memory_budget {
832            self.evict_least_used_indexes();
833        }
834
835        // Update selectivity statistics
836        self.update_selectivity_stats();
837    }
838
839    /// Evict least recently used indexes when over memory budget
840    fn evict_least_used_indexes(&mut self) {
841        let mut indexes_by_access: Vec<_> = self
842            .indexes
843            .iter()
844            .map(|(name, index)| (name.clone(), index.last_access))
845            .collect();
846
847        indexes_by_access.sort_by_key(|(_, access_time)| *access_time);
848
849        // Remove oldest indexes until under budget
850        while self.total_memory_usage() > self.config.memory_budget && !indexes_by_access.is_empty()
851        {
852            if let Some((name, _)) = indexes_by_access.pop() {
853                self.indexes.remove(&name);
854            }
855        }
856    }
857
858    /// Update selectivity statistics based on actual query results
859    fn update_selectivity_stats(&mut self) {
860        // This would analyze historical query results to update selectivity estimates
861        // For now, we use the default estimates
862    }
863}
864
865impl Default for IndexManager {
866    fn default() -> Self {
867        let mut manager = Self::new(IndexConfig::default());
868
869        // Create default indexes
870        manager.create_subject_index();
871        manager.create_predicate_index();
872        manager.create_object_index();
873
874        manager
875    }
876}
877
878#[cfg(test)]
879mod tests {
880    use super::*;
881    use std::sync::atomic::Ordering;
882
883    fn create_test_quad() -> Quad {
884        let subject = NamedNode::new("http://example.org/subject").expect("valid IRI");
885        let predicate = NamedNode::new("http://example.org/predicate").expect("valid IRI");
886        let object = Literal::new("test object");
887        let graph = NamedNode::new("http://example.org/graph").expect("valid IRI");
888
889        Quad::new(subject, predicate, object, graph)
890    }
891
892    #[test]
893    fn test_query_pattern_classification() {
894        let subject =
895            Subject::NamedNode(NamedNode::new("http://example.org/s").expect("valid IRI"));
896        let predicate =
897            Predicate::NamedNode(NamedNode::new("http://example.org/p").expect("valid IRI"));
898        let object = Object::Literal(Literal::new("o"));
899        let graph =
900            GraphName::NamedNode(NamedNode::new("http://example.org/g").expect("valid IRI"));
901
902        assert_eq!(
903            QueryPattern::from_query(None, None, None, None),
904            QueryPattern::FullScan
905        );
906
907        assert_eq!(
908            QueryPattern::from_query(Some(&subject), None, None, None),
909            QueryPattern::SubjectOnly
910        );
911
912        assert_eq!(
913            QueryPattern::from_query(Some(&subject), Some(&predicate), None, None),
914            QueryPattern::SubjectPredicate
915        );
916
917        assert_eq!(
918            QueryPattern::from_query(
919                Some(&subject),
920                Some(&predicate),
921                Some(&object),
922                Some(&graph)
923            ),
924            QueryPattern::FullMatch
925        );
926    }
927
928    #[test]
929    fn test_selectivity_estimates() {
930        assert!(
931            QueryPattern::FullMatch.estimated_selectivity()
932                < QueryPattern::SubjectPredicate.estimated_selectivity()
933        );
934        assert!(
935            QueryPattern::SubjectPredicate.estimated_selectivity()
936                < QueryPattern::SubjectOnly.estimated_selectivity()
937        );
938        assert!(
939            QueryPattern::SubjectOnly.estimated_selectivity()
940                < QueryPattern::FullScan.estimated_selectivity()
941        );
942    }
943
944    #[test]
945    fn test_quad_index_operations() {
946        let mut index = QuadIndex::new(IndexType::BTree);
947        let quad = create_test_quad();
948        let key = IndexKey::Single(Term::from_subject(quad.subject()));
949
950        // Test insertion
951        index.insert(&quad, key.clone());
952        assert_eq!(index.query(&key).expect("query should succeed").len(), 1);
953
954        // Test removal
955        index.remove(&quad, &key);
956        assert!(
957            index.query(&key).is_none()
958                || index.query(&key).expect("query should succeed").is_empty()
959        );
960    }
961
962    #[test]
963    fn test_index_manager_creation() {
964        let manager = IndexManager::default();
965
966        // Should have default indexes
967        assert!(manager.indexes.contains_key("subject"));
968        assert!(manager.indexes.contains_key("predicate"));
969        assert!(manager.indexes.contains_key("object"));
970    }
971
972    #[test]
973    fn test_index_manager_quad_operations() {
974        let mut manager = IndexManager::default();
975        let quad = create_test_quad();
976
977        // Test insertion
978        manager.insert_quad(&quad);
979
980        // Test querying by subject
981        let subject = quad.subject();
982        let results = manager.query_optimized(Some(subject), None, None, None);
983        assert!(results.is_some());
984        assert_eq!(results.expect("query results should be available").len(), 1);
985    }
986
987    #[test]
988    fn test_composite_index_creation() {
989        let mut manager = IndexManager::default();
990        manager.create_subject_predicate_index();
991
992        assert!(manager.indexes.contains_key("subject_predicate"));
993    }
994
995    #[test]
996    fn test_auto_index_creation() {
997        let config = IndexConfig {
998            min_query_frequency: 2,
999            ..Default::default()
1000        };
1001
1002        let mut manager = IndexManager::new(config);
1003        let subject =
1004            Subject::NamedNode(NamedNode::new("http://example.org/s").expect("valid IRI"));
1005
1006        // Query multiple times to trigger auto-creation
1007        for _ in 0..3 {
1008            manager.query_optimized(Some(&subject), None, None, None);
1009        }
1010
1011        // Should have created subject index
1012        assert!(manager.indexes.contains_key("subject"));
1013    }
1014
1015    #[test]
1016    fn test_index_memory_usage() {
1017        let mut manager = IndexManager::default();
1018        let initial_memory = manager.total_memory_usage();
1019
1020        // Add some quads
1021        for i in 0..100 {
1022            let subject = NamedNode::new(format!("http://example.org/subject{i}"))
1023                .expect("valid IRI from format");
1024            let predicate = NamedNode::new("http://example.org/predicate").expect("valid IRI");
1025            let object = Literal::new(format!("object{i}"));
1026            let quad = Quad::new(
1027                subject,
1028                predicate,
1029                object,
1030                NamedNode::new("http://example.org/graph").expect("valid IRI"),
1031            );
1032
1033            manager.insert_quad(&quad);
1034        }
1035
1036        let final_memory = manager.total_memory_usage();
1037        assert!(final_memory > initial_memory);
1038    }
1039
1040    #[test]
1041    fn test_statistics_collection() {
1042        let mut manager = IndexManager::default();
1043
1044        // Perform some queries
1045        let subject =
1046            Subject::NamedNode(NamedNode::new("http://example.org/s").expect("valid IRI"));
1047        manager.query_optimized(Some(&subject), None, None, None);
1048        manager.query_optimized(None, None, None, None);
1049
1050        let stats = manager.stats();
1051        assert_eq!(stats.lookups.load(Ordering::Relaxed), 2);
1052    }
1053}