Skip to main content

oxirs_arq/
adaptive_index_advisor.rs

1//! # Adaptive Index Advisor
2//!
3//! This module provides intelligent index recommendations based on query workload analysis.
4//! It analyzes query patterns, tracks index usage, and suggests optimal index configurations.
5//!
6//! ## Features
7//!
8//! - **Query Pattern Analysis**: Analyzes triple patterns to identify indexing opportunities
9//! - **Workload-Based Recommendations**: Learns from query history to suggest beneficial indexes
10//! - **Index Benefit Estimation**: Estimates performance improvements for proposed indexes
11//! - **Index Consolidation**: Identifies redundant or overlapping indexes
12//! - **Cost-Benefit Analysis**: Balances index maintenance cost against query performance gains
13//!
14//! ## Quick Start
15//!
16//! ```rust,ignore
17//! use oxirs_arq::adaptive_index_advisor::{
18//!     IndexAdvisor, AdvisorConfig, QueryPattern, IndexRecommendation,
19//! };
20//!
21//! // Create an index advisor
22//! let config = AdvisorConfig::default();
23//! let mut advisor = IndexAdvisor::new(config);
24//!
25//! // Record query patterns
26//! advisor.record_query("SELECT * WHERE { ?s :knows ?o }");
27//!
28//! // Get recommendations
29//! let recommendations = advisor.get_recommendations();
30//! ```
31
32use std::collections::{HashMap, HashSet};
33use std::fmt;
34use std::time::SystemTime;
35
36/// Index type for RDF data
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
38pub enum IndexType {
39    /// Subject-Predicate-Object ordering
40    SPO,
41    /// Subject-Object-Predicate ordering
42    SOP,
43    /// Predicate-Subject-Object ordering
44    PSO,
45    /// Predicate-Object-Subject ordering
46    POS,
47    /// Object-Subject-Predicate ordering
48    OSP,
49    /// Object-Predicate-Subject ordering
50    OPS,
51}
52
53impl fmt::Display for IndexType {
54    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
55        match self {
56            Self::SPO => write!(f, "SPO"),
57            Self::SOP => write!(f, "SOP"),
58            Self::PSO => write!(f, "PSO"),
59            Self::POS => write!(f, "POS"),
60            Self::OSP => write!(f, "OSP"),
61            Self::OPS => write!(f, "OPS"),
62        }
63    }
64}
65
66impl IndexType {
67    /// Get all possible index types
68    pub fn all() -> Vec<IndexType> {
69        vec![
70            Self::SPO,
71            Self::SOP,
72            Self::PSO,
73            Self::POS,
74            Self::OSP,
75            Self::OPS,
76        ]
77    }
78
79    /// Check if this index covers the given access pattern
80    /// An index is useful when its primary component is bound
81    pub fn covers_pattern(&self, pattern: &AccessPattern) -> bool {
82        match self {
83            // SPO/SOP indexes are useful when subject is bound
84            Self::SPO | Self::SOP => pattern.has_subject,
85            // PSO/POS indexes are useful when predicate is bound
86            Self::PSO | Self::POS => pattern.has_predicate,
87            // OSP/OPS indexes are useful when object is bound
88            Self::OSP | Self::OPS => pattern.has_object,
89        }
90    }
91
92    /// Get the selectivity order for this index
93    pub fn primary_component(&self) -> PatternComponent {
94        match self {
95            Self::SPO | Self::SOP => PatternComponent::Subject,
96            Self::PSO | Self::POS => PatternComponent::Predicate,
97            Self::OSP | Self::OPS => PatternComponent::Object,
98        }
99    }
100}
101
102/// Components of a triple pattern
103#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
104pub enum PatternComponent {
105    Subject,
106    Predicate,
107    Object,
108}
109
110impl fmt::Display for PatternComponent {
111    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
112        match self {
113            Self::Subject => write!(f, "subject"),
114            Self::Predicate => write!(f, "predicate"),
115            Self::Object => write!(f, "object"),
116        }
117    }
118}
119
120/// Access pattern extracted from a query
121#[derive(Debug, Clone, PartialEq, Eq, Hash)]
122pub struct AccessPattern {
123    /// Whether subject is bound (constant)
124    pub has_subject: bool,
125    /// Whether predicate is bound (constant)
126    pub has_predicate: bool,
127    /// Whether object is bound (constant)
128    pub has_object: bool,
129    /// Optional predicate value if known
130    pub predicate_value: Option<String>,
131}
132
133impl AccessPattern {
134    /// Create a new access pattern
135    pub fn new(has_subject: bool, has_predicate: bool, has_object: bool) -> Self {
136        Self {
137            has_subject,
138            has_predicate,
139            has_object,
140            predicate_value: None,
141        }
142    }
143
144    /// Create with predicate value
145    pub fn with_predicate(mut self, predicate: impl Into<String>) -> Self {
146        self.predicate_value = Some(predicate.into());
147        self
148    }
149
150    /// Get pattern signature for grouping
151    pub fn signature(&self) -> String {
152        format!(
153            "{}{}{}",
154            if self.has_subject { "S" } else { "?" },
155            if self.has_predicate { "P" } else { "?" },
156            if self.has_object { "O" } else { "?" }
157        )
158    }
159
160    /// Count bound components
161    pub fn bound_count(&self) -> usize {
162        let mut count = 0;
163        if self.has_subject {
164            count += 1;
165        }
166        if self.has_predicate {
167            count += 1;
168        }
169        if self.has_object {
170            count += 1;
171        }
172        count
173    }
174
175    /// Best index type for this pattern
176    pub fn best_index(&self) -> Option<IndexType> {
177        match (self.has_subject, self.has_predicate, self.has_object) {
178            (true, true, true) => Some(IndexType::SPO),
179            (true, true, false) => Some(IndexType::SPO),
180            (true, false, true) => Some(IndexType::SOP),
181            (true, false, false) => Some(IndexType::SPO),
182            (false, true, true) => Some(IndexType::POS),
183            (false, true, false) => Some(IndexType::PSO),
184            (false, false, true) => Some(IndexType::OSP),
185            (false, false, false) => None, // Full scan needed
186        }
187    }
188}
189
190impl fmt::Display for AccessPattern {
191    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192        write!(f, "{}", self.signature())
193    }
194}
195
196/// Query pattern with associated metadata
197#[derive(Debug, Clone)]
198pub struct QueryPattern {
199    /// The access pattern
200    pub access_pattern: AccessPattern,
201    /// Number of times this pattern was observed
202    pub frequency: usize,
203    /// Average selectivity (0.0 - 1.0)
204    pub avg_selectivity: f64,
205    /// Average execution time in milliseconds
206    pub avg_execution_time_ms: f64,
207    /// Last observed timestamp
208    pub last_seen: SystemTime,
209}
210
211impl QueryPattern {
212    /// Create a new query pattern
213    pub fn new(access_pattern: AccessPattern) -> Self {
214        Self {
215            access_pattern,
216            frequency: 1,
217            avg_selectivity: 1.0,
218            avg_execution_time_ms: 0.0,
219            last_seen: SystemTime::now(),
220        }
221    }
222
223    /// Update with new observation
224    pub fn update(&mut self, selectivity: f64, execution_time_ms: f64) {
225        let n = self.frequency as f64;
226        self.avg_selectivity = (self.avg_selectivity * n + selectivity) / (n + 1.0);
227        self.avg_execution_time_ms =
228            (self.avg_execution_time_ms * n + execution_time_ms) / (n + 1.0);
229        self.frequency += 1;
230        self.last_seen = SystemTime::now();
231    }
232}
233
234/// Configuration for the index advisor
235#[derive(Debug, Clone)]
236pub struct AdvisorConfig {
237    /// Minimum frequency for a pattern to be considered
238    pub min_pattern_frequency: usize,
239    /// Maximum number of recommendations to generate
240    pub max_recommendations: usize,
241    /// Weight for query frequency in benefit calculation
242    pub frequency_weight: f64,
243    /// Weight for execution time in benefit calculation
244    pub execution_time_weight: f64,
245    /// Minimum benefit score to recommend an index
246    pub min_benefit_score: f64,
247    /// Consider index maintenance cost
248    pub consider_maintenance_cost: bool,
249    /// Decay factor for old patterns (0.0-1.0)
250    pub time_decay_factor: f64,
251    /// Maximum patterns to track
252    pub max_tracked_patterns: usize,
253}
254
255impl Default for AdvisorConfig {
256    fn default() -> Self {
257        Self {
258            min_pattern_frequency: 5,
259            max_recommendations: 10,
260            frequency_weight: 0.4,
261            execution_time_weight: 0.6,
262            min_benefit_score: 0.1,
263            consider_maintenance_cost: true,
264            time_decay_factor: 0.95,
265            max_tracked_patterns: 1000,
266        }
267    }
268}
269
270impl AdvisorConfig {
271    /// Conservative configuration for production
272    pub fn conservative() -> Self {
273        Self {
274            min_pattern_frequency: 10,
275            max_recommendations: 5,
276            frequency_weight: 0.3,
277            execution_time_weight: 0.7,
278            min_benefit_score: 0.3,
279            consider_maintenance_cost: true,
280            time_decay_factor: 0.90,
281            max_tracked_patterns: 500,
282        }
283    }
284
285    /// Aggressive configuration for optimization
286    pub fn aggressive() -> Self {
287        Self {
288            min_pattern_frequency: 3,
289            max_recommendations: 15,
290            frequency_weight: 0.5,
291            execution_time_weight: 0.5,
292            min_benefit_score: 0.05,
293            consider_maintenance_cost: false,
294            time_decay_factor: 0.98,
295            max_tracked_patterns: 2000,
296        }
297    }
298}
299
300/// Index recommendation priority
301#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
302pub enum RecommendationPriority {
303    /// Critical - significant performance impact
304    Critical,
305    /// High - notable improvement expected
306    High,
307    /// Medium - moderate improvement expected
308    Medium,
309    /// Low - minor improvement expected
310    Low,
311}
312
313impl fmt::Display for RecommendationPriority {
314    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
315        match self {
316            Self::Critical => write!(f, "CRITICAL"),
317            Self::High => write!(f, "HIGH"),
318            Self::Medium => write!(f, "MEDIUM"),
319            Self::Low => write!(f, "LOW"),
320        }
321    }
322}
323
324/// A single index recommendation
325#[derive(Debug, Clone)]
326pub struct IndexRecommendation {
327    /// Recommended index type
328    pub index_type: IndexType,
329    /// Priority level
330    pub priority: RecommendationPriority,
331    /// Estimated benefit score (0.0 - 1.0)
332    pub benefit_score: f64,
333    /// Estimated performance improvement percentage
334    pub estimated_improvement_percent: f64,
335    /// Patterns that would benefit
336    pub benefiting_patterns: Vec<AccessPattern>,
337    /// Total query frequency affected
338    pub total_frequency: usize,
339    /// Rationale for the recommendation
340    pub rationale: String,
341    /// Estimated storage overhead
342    pub storage_overhead_percent: f64,
343    /// Estimated write performance impact
344    pub write_impact_percent: f64,
345}
346
347impl IndexRecommendation {
348    /// Get a human-readable summary
349    pub fn summary(&self) -> String {
350        format!(
351            "[{}] {} - {:.1}% improvement, affects {} queries",
352            self.priority,
353            self.index_type,
354            self.estimated_improvement_percent,
355            self.total_frequency
356        )
357    }
358}
359
360/// Current index configuration
361#[derive(Debug, Clone, Default)]
362pub struct IndexConfiguration {
363    /// Active indexes
364    pub active_indexes: HashSet<IndexType>,
365    /// Index usage statistics
366    pub usage_stats: HashMap<IndexType, IndexUsageStats>,
367}
368
369impl IndexConfiguration {
370    /// Create a default configuration (SPO only)
371    pub fn default_spo() -> Self {
372        let mut config = Self::default();
373        config.active_indexes.insert(IndexType::SPO);
374        config
375    }
376
377    /// Check if an index is active
378    pub fn has_index(&self, index_type: IndexType) -> bool {
379        self.active_indexes.contains(&index_type)
380    }
381
382    /// Add an index
383    pub fn add_index(&mut self, index_type: IndexType) {
384        self.active_indexes.insert(index_type);
385    }
386
387    /// Remove an index
388    pub fn remove_index(&mut self, index_type: IndexType) -> bool {
389        self.active_indexes.remove(&index_type)
390    }
391}
392
393/// Usage statistics for an index
394#[derive(Debug, Clone, Default)]
395pub struct IndexUsageStats {
396    /// Number of queries that used this index
397    pub query_count: usize,
398    /// Number of index lookups
399    pub lookup_count: usize,
400    /// Total rows scanned
401    pub rows_scanned: usize,
402    /// Total rows returned
403    pub rows_returned: usize,
404    /// Cache hit ratio
405    pub cache_hit_ratio: f64,
406}
407
408/// Analysis report for index optimization
409#[derive(Debug, Clone)]
410pub struct IndexAnalysisReport {
411    /// Generated timestamp
412    pub generated_at: SystemTime,
413    /// Current index configuration
414    pub current_config: IndexConfiguration,
415    /// Recommendations
416    pub recommendations: Vec<IndexRecommendation>,
417    /// Unused indexes that could be removed
418    pub unused_indexes: Vec<IndexType>,
419    /// Overlapping index pairs
420    pub overlapping_indexes: Vec<(IndexType, IndexType)>,
421    /// Summary statistics
422    pub summary: AnalysisSummary,
423}
424
425impl IndexAnalysisReport {
426    /// Get high priority recommendations
427    pub fn high_priority(&self) -> Vec<&IndexRecommendation> {
428        self.recommendations
429            .iter()
430            .filter(|r| r.priority <= RecommendationPriority::High)
431            .collect()
432    }
433
434    /// Check if any changes are recommended
435    pub fn has_recommendations(&self) -> bool {
436        !self.recommendations.is_empty() || !self.unused_indexes.is_empty()
437    }
438
439    /// Generate a text summary
440    pub fn text_summary(&self) -> String {
441        let mut text = String::from("Index Analysis Report\n");
442        text.push_str(&format!("Generated: {:?}\n\n", self.generated_at));
443
444        text.push_str(&format!(
445            "Current Indexes: {:?}\n",
446            self.current_config.active_indexes
447        ));
448        text.push_str(&format!(
449            "Patterns Analyzed: {}\n",
450            self.summary.total_patterns
451        ));
452        text.push_str(&format!(
453            "Total Queries: {}\n\n",
454            self.summary.total_queries
455        ));
456
457        if !self.recommendations.is_empty() {
458            text.push_str("Recommendations:\n");
459            for rec in &self.recommendations {
460                text.push_str(&format!("  - {}\n", rec.summary()));
461            }
462            text.push('\n');
463        }
464
465        if !self.unused_indexes.is_empty() {
466            text.push_str(&format!(
467                "Unused Indexes (consider removing): {:?}\n",
468                self.unused_indexes
469            ));
470        }
471
472        text
473    }
474}
475
476/// Summary statistics for analysis
477#[derive(Debug, Clone, Default)]
478pub struct AnalysisSummary {
479    /// Total patterns analyzed
480    pub total_patterns: usize,
481    /// Total queries analyzed
482    pub total_queries: usize,
483    /// Average selectivity
484    pub avg_selectivity: f64,
485    /// Most frequent pattern signature
486    pub most_frequent_pattern: Option<String>,
487    /// Estimated overall improvement if all recommendations adopted
488    pub potential_improvement_percent: f64,
489}
490
491/// Main index advisor
492#[derive(Debug)]
493pub struct IndexAdvisor {
494    /// Configuration
495    config: AdvisorConfig,
496    /// Current index configuration
497    current_indexes: IndexConfiguration,
498    /// Tracked query patterns
499    patterns: HashMap<String, QueryPattern>,
500    /// Statistics
501    stats: AdvisorStatistics,
502}
503
504/// Statistics for the advisor
505#[derive(Debug, Clone, Default)]
506pub struct AdvisorStatistics {
507    /// Total queries recorded
508    pub total_queries: usize,
509    /// Total patterns discovered
510    pub total_patterns: usize,
511    /// Analysis count
512    pub analyses_performed: usize,
513    /// Recommendations generated
514    pub recommendations_generated: usize,
515}
516
517impl IndexAdvisor {
518    /// Create a new index advisor
519    pub fn new(config: AdvisorConfig) -> Self {
520        Self {
521            config,
522            current_indexes: IndexConfiguration::default_spo(),
523            patterns: HashMap::new(),
524            stats: AdvisorStatistics::default(),
525        }
526    }
527
528    /// Create with default configuration
529    pub fn with_defaults() -> Self {
530        Self::new(AdvisorConfig::default())
531    }
532
533    /// Set the current index configuration
534    pub fn set_indexes(&mut self, indexes: IndexConfiguration) {
535        self.current_indexes = indexes;
536    }
537
538    /// Record a query pattern
539    pub fn record_pattern(
540        &mut self,
541        access_pattern: AccessPattern,
542        selectivity: f64,
543        execution_time_ms: f64,
544    ) {
545        let signature = access_pattern.signature();
546        self.stats.total_queries += 1;
547
548        if let Some(pattern) = self.patterns.get_mut(&signature) {
549            pattern.update(selectivity, execution_time_ms);
550        } else {
551            let mut pattern = QueryPattern::new(access_pattern);
552            pattern.avg_selectivity = selectivity;
553            pattern.avg_execution_time_ms = execution_time_ms;
554            self.patterns.insert(signature, pattern);
555            self.stats.total_patterns += 1;
556        }
557
558        // Evict old patterns if needed
559        if self.patterns.len() > self.config.max_tracked_patterns {
560            self.evict_old_patterns();
561        }
562    }
563
564    /// Record a simple query (parses pattern from SPARQL-like syntax)
565    pub fn record_query(&mut self, query: &str) {
566        // Simple pattern extraction - look for triple patterns
567        let patterns = self.extract_patterns(query);
568        for pattern in patterns {
569            self.record_pattern(pattern, 1.0, 0.0);
570        }
571    }
572
573    /// Analyze workload and generate recommendations
574    pub fn analyze(&mut self) -> IndexAnalysisReport {
575        self.stats.analyses_performed += 1;
576
577        let mut recommendations = Vec::new();
578        let mut index_benefits: HashMap<IndexType, IndexBenefitAccumulator> = HashMap::new();
579
580        // Analyze each pattern
581        for pattern in self.patterns.values() {
582            if pattern.frequency < self.config.min_pattern_frequency {
583                continue;
584            }
585
586            // Find best index for this pattern
587            if let Some(best_index) = pattern.access_pattern.best_index() {
588                // Check if we already have this index
589                if !self.current_indexes.has_index(best_index) {
590                    let entry = index_benefits.entry(best_index).or_default();
591                    entry.add_pattern(pattern);
592                }
593            }
594        }
595
596        // Convert benefits to recommendations
597        for (index_type, benefits) in index_benefits {
598            let benefit_score = self.calculate_benefit_score(&benefits);
599            if benefit_score >= self.config.min_benefit_score {
600                let priority = self.determine_priority(benefit_score);
601                let estimated_improvement = self.estimate_improvement(&benefits);
602
603                recommendations.push(IndexRecommendation {
604                    index_type,
605                    priority,
606                    benefit_score,
607                    estimated_improvement_percent: estimated_improvement,
608                    benefiting_patterns: benefits.patterns.clone(),
609                    total_frequency: benefits.total_frequency,
610                    rationale: self.generate_rationale(&benefits, index_type),
611                    storage_overhead_percent: self.estimate_storage_overhead(index_type),
612                    write_impact_percent: self.estimate_write_impact(index_type),
613                });
614            }
615        }
616
617        // Sort by priority and benefit
618        recommendations.sort_by(|a, b| match a.priority.cmp(&b.priority) {
619            std::cmp::Ordering::Equal => b
620                .benefit_score
621                .partial_cmp(&a.benefit_score)
622                .unwrap_or(std::cmp::Ordering::Equal),
623            other => other,
624        });
625
626        // Limit recommendations
627        recommendations.truncate(self.config.max_recommendations);
628        self.stats.recommendations_generated += recommendations.len();
629
630        // Find unused indexes
631        let unused_indexes = self.find_unused_indexes();
632
633        // Find overlapping indexes
634        let overlapping_indexes = self.find_overlapping_indexes();
635
636        // Generate summary
637        let summary = self.generate_summary(&recommendations);
638
639        IndexAnalysisReport {
640            generated_at: SystemTime::now(),
641            current_config: self.current_indexes.clone(),
642            recommendations,
643            unused_indexes,
644            overlapping_indexes,
645            summary,
646        }
647    }
648
649    /// Get current statistics
650    pub fn statistics(&self) -> &AdvisorStatistics {
651        &self.stats
652    }
653
654    /// Get current configuration
655    pub fn config(&self) -> &AdvisorConfig {
656        &self.config
657    }
658
659    /// Clear all tracked patterns
660    pub fn clear_patterns(&mut self) {
661        self.patterns.clear();
662        self.stats.total_patterns = 0;
663    }
664
665    /// Export tracked patterns for persistence
666    pub fn export_patterns(&self) -> Vec<(String, QueryPattern)> {
667        self.patterns
668            .iter()
669            .map(|(k, v)| (k.clone(), v.clone()))
670            .collect()
671    }
672
673    /// Import patterns
674    pub fn import_patterns(&mut self, patterns: Vec<(String, QueryPattern)>) {
675        for (sig, pattern) in patterns {
676            self.patterns.insert(sig, pattern);
677            self.stats.total_patterns += 1;
678        }
679    }
680
681    // Private methods
682
683    fn extract_patterns(&self, query: &str) -> Vec<AccessPattern> {
684        let mut patterns = Vec::new();
685        let query_lower = query.to_lowercase();
686
687        // Look for triple pattern-like structures: ?var :pred ?var or :subj :pred ?var etc.
688        // Simple heuristic parsing - not full SPARQL parsing
689        for part in query_lower.split('{') {
690            for segment in part.split('.') {
691                let segment = segment.trim();
692                if segment.is_empty() || segment.starts_with('}') {
693                    continue;
694                }
695
696                let tokens: Vec<&str> = segment.split_whitespace().collect();
697                if tokens.len() >= 3 {
698                    let has_subject = !tokens[0].starts_with('?');
699                    let has_predicate = !tokens[1].starts_with('?');
700                    let has_object = !tokens[2].starts_with('?');
701
702                    let mut pattern = AccessPattern::new(has_subject, has_predicate, has_object);
703                    if has_predicate {
704                        pattern.predicate_value = Some(tokens[1].to_string());
705                    }
706                    patterns.push(pattern);
707                }
708            }
709        }
710
711        if patterns.is_empty() {
712            // Default to full scan pattern if no patterns found
713            patterns.push(AccessPattern::new(false, false, false));
714        }
715
716        patterns
717    }
718
719    fn calculate_benefit_score(&self, benefits: &IndexBenefitAccumulator) -> f64 {
720        let freq_score = (benefits.total_frequency as f64).ln().max(0.0) / 10.0;
721        let time_score = benefits.total_execution_time_ms / 1000.0;
722
723        let raw_score = self.config.frequency_weight * freq_score
724            + self.config.execution_time_weight * time_score;
725
726        // Normalize to 0.0 - 1.0
727        (raw_score / 2.0).min(1.0)
728    }
729
730    fn determine_priority(&self, benefit_score: f64) -> RecommendationPriority {
731        if benefit_score >= 0.8 {
732            RecommendationPriority::Critical
733        } else if benefit_score >= 0.5 {
734            RecommendationPriority::High
735        } else if benefit_score >= 0.3 {
736            RecommendationPriority::Medium
737        } else {
738            RecommendationPriority::Low
739        }
740    }
741
742    fn estimate_improvement(&self, benefits: &IndexBenefitAccumulator) -> f64 {
743        // Estimate based on selectivity improvement
744        let avg_selectivity = if benefits.patterns.is_empty() {
745            1.0
746        } else {
747            benefits.patterns.len() as f64 / self.patterns.len() as f64
748        };
749
750        // Indexes typically improve performance by 10-90% depending on selectivity
751        let base_improvement = 50.0 * (1.0 - avg_selectivity);
752        base_improvement.clamp(10.0, 90.0)
753    }
754
755    fn generate_rationale(
756        &self,
757        benefits: &IndexBenefitAccumulator,
758        index_type: IndexType,
759    ) -> String {
760        format!(
761            "Index {} would benefit {} pattern(s) with total frequency {} queries. \
762             Primary optimization for {} lookups.",
763            index_type,
764            benefits.patterns.len(),
765            benefits.total_frequency,
766            index_type.primary_component()
767        )
768    }
769
770    fn estimate_storage_overhead(&self, _index_type: IndexType) -> f64 {
771        // Each index typically adds ~100% storage overhead for the indexed data
772        // This is a simplified estimate
773        100.0
774    }
775
776    fn estimate_write_impact(&self, _index_type: IndexType) -> f64 {
777        // Each additional index typically adds ~20% write overhead
778        20.0
779    }
780
781    fn find_unused_indexes(&self) -> Vec<IndexType> {
782        let mut unused = Vec::new();
783        for &index_type in &self.current_indexes.active_indexes {
784            let is_used = self.patterns.values().any(|p| {
785                p.frequency >= self.config.min_pattern_frequency
786                    && index_type.covers_pattern(&p.access_pattern)
787            });
788            if !is_used && index_type != IndexType::SPO {
789                // Never recommend removing SPO as it's the default
790                unused.push(index_type);
791            }
792        }
793        unused
794    }
795
796    fn find_overlapping_indexes(&self) -> Vec<(IndexType, IndexType)> {
797        let mut overlaps = Vec::new();
798        let indexes: Vec<_> = self.current_indexes.active_indexes.iter().collect();
799
800        for i in 0..indexes.len() {
801            for j in (i + 1)..indexes.len() {
802                if indexes[i].primary_component() == indexes[j].primary_component() {
803                    overlaps.push((*indexes[i], *indexes[j]));
804                }
805            }
806        }
807        overlaps
808    }
809
810    fn generate_summary(&self, recommendations: &[IndexRecommendation]) -> AnalysisSummary {
811        let total_patterns = self.patterns.len();
812        let total_queries: usize = self.patterns.values().map(|p| p.frequency).sum();
813
814        let avg_selectivity = if total_patterns > 0 {
815            self.patterns
816                .values()
817                .map(|p| p.avg_selectivity)
818                .sum::<f64>()
819                / total_patterns as f64
820        } else {
821            1.0
822        };
823
824        let most_frequent_pattern = self
825            .patterns
826            .iter()
827            .max_by_key(|(_, p)| p.frequency)
828            .map(|(sig, _)| sig.clone());
829
830        let potential_improvement = recommendations
831            .iter()
832            .map(|r| r.estimated_improvement_percent)
833            .sum::<f64>()
834            .min(95.0); // Cap at 95%
835
836        AnalysisSummary {
837            total_patterns,
838            total_queries,
839            avg_selectivity,
840            most_frequent_pattern,
841            potential_improvement_percent: potential_improvement,
842        }
843    }
844
845    fn evict_old_patterns(&mut self) {
846        // Sort by last seen and keep most recent
847        let mut sorted: Vec<_> = self.patterns.iter().collect();
848        sorted.sort_by(|a, b| b.1.last_seen.cmp(&a.1.last_seen));
849
850        let to_keep: HashSet<_> = sorted
851            .iter()
852            .take(self.config.max_tracked_patterns / 2)
853            .map(|(k, _)| (*k).clone())
854            .collect();
855
856        self.patterns.retain(|k, _| to_keep.contains(k));
857        self.stats.total_patterns = self.patterns.len();
858    }
859}
860
861/// Accumulator for index benefit calculation
862#[derive(Debug, Default)]
863struct IndexBenefitAccumulator {
864    patterns: Vec<AccessPattern>,
865    total_frequency: usize,
866    total_execution_time_ms: f64,
867}
868
869impl IndexBenefitAccumulator {
870    fn add_pattern(&mut self, pattern: &QueryPattern) {
871        self.patterns.push(pattern.access_pattern.clone());
872        self.total_frequency += pattern.frequency;
873        self.total_execution_time_ms += pattern.avg_execution_time_ms * pattern.frequency as f64;
874    }
875}
876
877#[cfg(test)]
878mod tests {
879    use super::*;
880
881    #[test]
882    fn test_access_pattern_signature() {
883        let pattern = AccessPattern::new(true, true, false);
884        assert_eq!(pattern.signature(), "SP?");
885
886        let pattern = AccessPattern::new(false, true, true);
887        assert_eq!(pattern.signature(), "?PO");
888
889        let pattern = AccessPattern::new(false, false, false);
890        assert_eq!(pattern.signature(), "???");
891    }
892
893    #[test]
894    fn test_access_pattern_best_index() {
895        assert_eq!(
896            AccessPattern::new(true, true, false).best_index(),
897            Some(IndexType::SPO)
898        );
899        assert_eq!(
900            AccessPattern::new(true, false, true).best_index(),
901            Some(IndexType::SOP)
902        );
903        assert_eq!(
904            AccessPattern::new(false, true, true).best_index(),
905            Some(IndexType::POS)
906        );
907        assert_eq!(
908            AccessPattern::new(false, false, true).best_index(),
909            Some(IndexType::OSP)
910        );
911        assert_eq!(AccessPattern::new(false, false, false).best_index(), None);
912    }
913
914    #[test]
915    fn test_index_type_covers_pattern() {
916        let pattern = AccessPattern::new(true, true, false);
917        assert!(IndexType::SPO.covers_pattern(&pattern));
918        assert!(IndexType::PSO.covers_pattern(&pattern));
919    }
920
921    #[test]
922    fn test_query_pattern_update() {
923        let mut pattern = QueryPattern::new(AccessPattern::new(true, false, false));
924        assert_eq!(pattern.frequency, 1);
925
926        pattern.update(0.5, 10.0);
927        assert_eq!(pattern.frequency, 2);
928
929        pattern.update(0.3, 20.0);
930        assert_eq!(pattern.frequency, 3);
931    }
932
933    #[test]
934    fn test_index_advisor_creation() {
935        let advisor = IndexAdvisor::with_defaults();
936        assert!(advisor.current_indexes.has_index(IndexType::SPO));
937    }
938
939    #[test]
940    fn test_record_pattern() {
941        let mut advisor = IndexAdvisor::with_defaults();
942
943        advisor.record_pattern(AccessPattern::new(true, true, false), 0.1, 5.0);
944        advisor.record_pattern(AccessPattern::new(true, true, false), 0.2, 6.0);
945
946        assert_eq!(advisor.stats.total_queries, 2);
947        assert_eq!(advisor.stats.total_patterns, 1);
948    }
949
950    #[test]
951    fn test_record_query() {
952        let mut advisor = IndexAdvisor::with_defaults();
953
954        advisor.record_query("SELECT * WHERE { ?s :knows ?o }");
955        assert!(advisor.stats.total_queries > 0);
956    }
957
958    #[test]
959    fn test_analyze_with_patterns() {
960        let config = AdvisorConfig {
961            min_pattern_frequency: 2,
962            min_benefit_score: 0.01, // Lower threshold for test
963            ..Default::default()
964        };
965        let mut advisor = IndexAdvisor::new(config);
966
967        // Record patterns that would benefit from POS index (not covered by SPO)
968        for _ in 0..50 {
969            advisor.record_pattern(AccessPattern::new(false, true, true), 0.1, 100.0);
970        }
971
972        let report = advisor.analyze();
973        // May or may not have recommendations depending on benefit calculation
974        // Just verify analysis completes successfully
975        assert!(report.summary.total_patterns > 0);
976        assert!(report.summary.total_queries > 0);
977    }
978
979    #[test]
980    fn test_analyze_empty() {
981        let mut advisor = IndexAdvisor::with_defaults();
982        let report = advisor.analyze();
983        assert!(report.recommendations.is_empty());
984    }
985
986    #[test]
987    fn test_recommendation_priority() {
988        let config = AdvisorConfig {
989            min_pattern_frequency: 1,
990            ..Default::default()
991        };
992        let mut advisor = IndexAdvisor::new(config);
993
994        // High frequency pattern
995        for _ in 0..100 {
996            advisor.record_pattern(AccessPattern::new(false, false, true), 0.01, 100.0);
997        }
998
999        let report = advisor.analyze();
1000        if !report.recommendations.is_empty() {
1001            // Should have high priority due to frequency
1002            assert!(report.recommendations[0].priority <= RecommendationPriority::High);
1003        }
1004    }
1005
1006    #[test]
1007    fn test_index_configuration() {
1008        let mut config = IndexConfiguration::default_spo();
1009        assert!(config.has_index(IndexType::SPO));
1010        assert!(!config.has_index(IndexType::POS));
1011
1012        config.add_index(IndexType::POS);
1013        assert!(config.has_index(IndexType::POS));
1014
1015        config.remove_index(IndexType::POS);
1016        assert!(!config.has_index(IndexType::POS));
1017    }
1018
1019    #[test]
1020    fn test_unused_index_detection() {
1021        let mut advisor = IndexAdvisor::with_defaults();
1022        advisor.current_indexes.add_index(IndexType::OSP);
1023
1024        // Only record SPO-compatible patterns
1025        for _ in 0..10 {
1026            advisor.record_pattern(AccessPattern::new(true, true, false), 0.1, 5.0);
1027        }
1028
1029        let report = advisor.analyze();
1030        assert!(report.unused_indexes.contains(&IndexType::OSP));
1031    }
1032
1033    #[test]
1034    fn test_export_import_patterns() {
1035        let mut advisor = IndexAdvisor::with_defaults();
1036        advisor.record_pattern(AccessPattern::new(true, false, false), 0.5, 10.0);
1037        advisor.record_pattern(AccessPattern::new(false, true, false), 0.3, 15.0);
1038
1039        let exported = advisor.export_patterns();
1040        assert_eq!(exported.len(), 2);
1041
1042        let mut new_advisor = IndexAdvisor::with_defaults();
1043        new_advisor.import_patterns(exported);
1044        assert_eq!(new_advisor.stats.total_patterns, 2);
1045    }
1046
1047    #[test]
1048    fn test_config_presets() {
1049        let conservative = AdvisorConfig::conservative();
1050        assert_eq!(conservative.min_pattern_frequency, 10);
1051
1052        let aggressive = AdvisorConfig::aggressive();
1053        assert_eq!(aggressive.min_pattern_frequency, 3);
1054    }
1055
1056    #[test]
1057    fn test_report_text_summary() {
1058        let config = AdvisorConfig {
1059            min_pattern_frequency: 1,
1060            ..Default::default()
1061        };
1062        let mut advisor = IndexAdvisor::new(config);
1063
1064        for _ in 0..5 {
1065            advisor.record_pattern(AccessPattern::new(false, true, true), 0.1, 10.0);
1066        }
1067
1068        let report = advisor.analyze();
1069        let summary = report.text_summary();
1070
1071        assert!(summary.contains("Index Analysis Report"));
1072        assert!(summary.contains("Current Indexes"));
1073    }
1074
1075    #[test]
1076    fn test_pattern_bound_count() {
1077        assert_eq!(AccessPattern::new(true, true, true).bound_count(), 3);
1078        assert_eq!(AccessPattern::new(true, false, false).bound_count(), 1);
1079        assert_eq!(AccessPattern::new(false, false, false).bound_count(), 0);
1080    }
1081
1082    #[test]
1083    fn test_index_type_display() {
1084        assert_eq!(format!("{}", IndexType::SPO), "SPO");
1085        assert_eq!(format!("{}", IndexType::POS), "POS");
1086    }
1087}