Skip to main content

oxirs_arq/
jit_compiler.rs

1//! JIT Compilation for SPARQL Queries
2//!
3//! This module provides Just-In-Time compilation for SPARQL queries, transforming
4//! high-level SPARQL algebra into optimized execution plans that can be compiled
5//! to native code or bytecode for improved performance.
6//!
7//! # Features
8//!
9//! - **Query Plan Compilation**: Transform SPARQL algebra into optimized execution plans
10//! - **Code Generation**: Generate specialized code for query patterns
11//! - **Plan Caching**: Reuse compiled plans with intelligent invalidation
12//! - **Adaptive Optimization**: Runtime profiling and re-compilation
13//! - **Performance Tracking**: Measure compilation and execution metrics
14//!
15//! # Architecture
16//!
17//! ```text
18//! SPARQL Query → Algebra → Plan Generation → Code Gen → Specialized Executor
19//!                   ↓           ↓              ↓              ↓
20//!              Optimization  Lowering    Specialization  Execution
21//! ```
22//!
23//! # Example
24//!
25//! ```rust,ignore
26//! use oxirs_arq::jit_compiler::{QueryJitCompiler, JitCompilerConfig};
27//!
28//! let config = JitCompilerConfig::default();
29//! let mut compiler = QueryJitCompiler::new(config)?;
30//!
31//! // Compile a SPARQL algebra
32//! let compiled = compiler.compile(&algebra)?;
33//!
34//! // Execute compiled plan
35//! let results = compiled.execute(&dataset)?;
36//! ```
37
38use crate::algebra::Algebra;
39use crate::cardinality_estimator::{CardinalityEstimator, EstimatorConfig};
40use anyhow::Result;
41use dashmap::DashMap;
42use parking_lot::RwLock;
43use scirs2_core::metrics::MetricsRegistry;
44use scirs2_core::profiling::Profiler;
45use serde::{Deserialize, Serialize};
46use std::fmt;
47use std::sync::Arc;
48use std::time::{Duration, Instant};
49use tracing::{debug, info, warn};
50
51/// Configuration for SPARQL query JIT compilation
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct JitCompilerConfig {
54    /// Enable JIT compilation
55    pub enabled: bool,
56
57    /// Optimization level (0-3)
58    pub optimization_level: usize,
59
60    /// Enable query plan caching
61    pub enable_caching: bool,
62
63    /// Maximum cache size (in bytes)
64    pub max_cache_size: usize,
65
66    /// Cache TTL (time-to-live)
67    pub cache_ttl: Duration,
68
69    /// Enable adaptive optimization
70    pub adaptive_optimization: bool,
71
72    /// Minimum execution count before re-optimization
73    pub min_executions_for_reopt: usize,
74
75    /// Compilation timeout
76    pub compilation_timeout: Duration,
77
78    /// Enable performance profiling
79    pub enable_profiling: bool,
80
81    /// Enable specialized code generation
82    pub enable_specialization: bool,
83
84    /// Maximum plan complexity for compilation
85    pub max_plan_complexity: usize,
86}
87
88impl Default for JitCompilerConfig {
89    fn default() -> Self {
90        Self {
91            enabled: true,
92            optimization_level: 2,
93            enable_caching: true,
94            max_cache_size: 512 * 1024 * 1024,    // 512MB
95            cache_ttl: Duration::from_secs(3600), // 1 hour
96            adaptive_optimization: true,
97            min_executions_for_reopt: 10,
98            compilation_timeout: Duration::from_secs(30),
99            enable_profiling: true,
100            enable_specialization: true,
101            max_plan_complexity: 1000,
102        }
103    }
104}
105
106/// Compiled SPARQL query plan ready for execution
107#[derive(Clone)]
108pub struct CompiledQuery {
109    /// Unique query identifier
110    pub id: String,
111
112    /// Original SPARQL algebra
113    pub algebra: Arc<Algebra>,
114
115    /// Compiled execution plan
116    pub plan: Arc<ExecutionPlan>,
117
118    /// Compilation timestamp
119    pub compiled_at: Instant,
120
121    /// Execution statistics
122    pub stats: Arc<RwLock<ExecutionStats>>,
123
124    /// Optimization metadata
125    pub metadata: QueryMetadata,
126}
127
128impl fmt::Debug for CompiledQuery {
129    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
130        f.debug_struct("CompiledQuery")
131            .field("id", &self.id)
132            .field("compiled_at", &self.compiled_at.elapsed())
133            .field("stats", &self.stats)
134            .field("metadata", &self.metadata)
135            .finish()
136    }
137}
138
139/// Execution plan generated from SPARQL algebra
140#[derive(Debug, Clone, Serialize, Deserialize)]
141pub struct ExecutionPlan {
142    /// Plan operations
143    pub operations: Vec<PlanOperation>,
144
145    /// Estimated cost
146    pub estimated_cost: f64,
147
148    /// Estimated memory usage (bytes)
149    pub estimated_memory: usize,
150
151    /// Optimization hints applied
152    pub optimization_hints: Vec<String>,
153
154    /// Specialization metadata
155    pub specializations: Vec<Specialization>,
156}
157
158/// Individual operation in execution plan
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub enum PlanOperation {
161    /// Scan triple patterns (specialized for pattern type)
162    ScanTriples {
163        pattern_id: usize,
164        pattern_type: PatternType,
165        estimated_cardinality: usize,
166    },
167
168    /// Hash join operation (with strategy)
169    HashJoin {
170        left_id: usize,
171        right_id: usize,
172        join_variables: Vec<String>,
173        strategy: JitJoinStrategy,
174    },
175
176    /// Nested loop join (for small cardinalities)
177    NestedLoopJoin {
178        left_id: usize,
179        right_id: usize,
180        join_variables: Vec<String>,
181    },
182
183    /// Filter operation (with specialization)
184    Filter {
185        expr_id: usize,
186        filter_type: FilterType,
187    },
188
189    /// Project variables
190    Project { variables: Vec<String> },
191
192    /// Sort operation
193    Sort {
194        variables: Vec<String>,
195        ascending: Vec<bool>,
196    },
197
198    /// Limit operation
199    Limit { limit: usize },
200
201    /// Offset operation
202    Offset { offset: usize },
203
204    /// Distinct operation
205    Distinct,
206
207    /// Union operation
208    Union { branches: Vec<usize> },
209
210    /// Optional (left join) operation
211    Optional { left_id: usize, right_id: usize },
212}
213
214/// Pattern types for specialization
215#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
216pub enum PatternType {
217    /// All variables (?s ?p ?o)
218    AllVariables,
219
220    /// Subject bound (s ?p ?o)
221    SubjectBound,
222
223    /// Predicate bound (?s p ?o)
224    PredicateBound,
225
226    /// Object bound (?s ?p o)
227    ObjectBound,
228
229    /// Subject-Predicate bound (s p ?o)
230    SubjectPredicateBound,
231
232    /// Subject-Object bound (s ?p o)
233    SubjectObjectBound,
234
235    /// Predicate-Object bound (?s p o)
236    PredicateObjectBound,
237
238    /// Fully bound (s p o)
239    FullyBound,
240}
241
242/// Join strategies for JIT compilation
243#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
244pub enum JitJoinStrategy {
245    /// Hash join (default for large inputs)
246    Hash,
247
248    /// Sort-merge join
249    SortMerge,
250
251    /// Index nested loop join
252    IndexNestedLoop,
253
254    /// Bind join (for federated queries)
255    Bind,
256}
257
258/// Filter types for optimization
259#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
260pub enum FilterType {
261    /// Simple equality filter
262    Equality,
263
264    /// Numeric comparison
265    NumericComparison,
266
267    /// String operation
268    StringOperation,
269
270    /// Regex filter
271    Regex,
272
273    /// Boolean logic
274    BooleanLogic,
275
276    /// Complex expression
277    Complex,
278}
279
280/// Specialization applied to the plan
281#[derive(Debug, Clone, Serialize, Deserialize)]
282pub struct Specialization {
283    /// Specialization type
284    pub spec_type: SpecializationType,
285
286    /// Description
287    pub description: String,
288
289    /// Expected speedup factor
290    pub speedup_factor: f64,
291}
292
293/// Types of specializations
294#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
295pub enum SpecializationType {
296    /// Pattern-specific scanning
297    PatternScanning,
298
299    /// Join strategy selection
300    JoinStrategy,
301
302    /// Filter pushdown
303    FilterPushdown,
304
305    /// Index usage
306    IndexUsage,
307
308    /// SIMD vectorization
309    SimdVectorization,
310
311    /// Parallel execution
312    ParallelExecution,
313}
314
315/// Query execution statistics
316#[derive(Debug, Default, Clone, Serialize, Deserialize)]
317pub struct ExecutionStats {
318    /// Number of executions
319    pub execution_count: usize,
320
321    /// Total execution time
322    pub total_execution_time: Duration,
323
324    /// Average execution time
325    pub avg_execution_time: Duration,
326
327    /// Minimum execution time
328    pub min_execution_time: Option<Duration>,
329
330    /// Maximum execution time
331    pub max_execution_time: Option<Duration>,
332
333    /// Total results produced
334    pub total_results: usize,
335
336    /// Average results per execution
337    pub avg_results: f64,
338
339    /// Last execution timestamp (excluded from serialization)
340    #[serde(skip)]
341    pub last_executed: Option<Instant>,
342
343    /// Compilation time
344    pub compilation_time: Duration,
345
346    /// Re-optimization count
347    pub reoptimization_count: usize,
348}
349
350impl ExecutionStats {
351    /// Update statistics with a new execution
352    pub fn record_execution(&mut self, duration: Duration, result_count: usize) {
353        self.execution_count += 1;
354        self.total_execution_time += duration;
355        self.avg_execution_time = self.total_execution_time / self.execution_count as u32;
356        self.total_results += result_count;
357        self.avg_results = self.total_results as f64 / self.execution_count as f64;
358        self.last_executed = Some(Instant::now());
359
360        self.min_execution_time = Some(
361            self.min_execution_time
362                .map_or(duration, |min| min.min(duration)),
363        );
364        self.max_execution_time = Some(
365            self.max_execution_time
366                .map_or(duration, |max| max.max(duration)),
367        );
368    }
369
370    /// Check if re-optimization is beneficial
371    pub fn should_reoptimize(&self, min_executions: usize) -> bool {
372        self.execution_count >= min_executions
373            && self.avg_execution_time > Duration::from_millis(100)
374    }
375}
376
377/// Query compilation metadata
378#[derive(Debug, Clone, Default, Serialize, Deserialize)]
379pub struct QueryMetadata {
380    /// Query complexity score (0-1000)
381    pub complexity: usize,
382
383    /// Estimated memory usage (bytes)
384    pub estimated_memory: usize,
385
386    /// Number of triple patterns
387    pub triple_pattern_count: usize,
388
389    /// Number of joins
390    pub join_count: usize,
391
392    /// Number of filters
393    pub filter_count: usize,
394
395    /// Has aggregation
396    pub has_aggregation: bool,
397
398    /// Has optional patterns
399    pub has_optional: bool,
400
401    /// Has union patterns
402    pub has_union: bool,
403
404    /// Optimization opportunities
405    pub optimization_opportunities: Vec<String>,
406}
407
408/// SPARQL Query JIT Compiler
409pub struct QueryJitCompiler {
410    /// Compiler configuration
411    config: JitCompilerConfig,
412
413    /// Compiled query cache
414    query_cache: Arc<DashMap<String, Arc<CompiledQuery>>>,
415
416    /// Metric registry (reserved for future use)
417    #[allow(dead_code)]
418    metrics: Arc<MetricsRegistry>,
419
420    /// Performance profiler (reserved for future use)
421    #[allow(dead_code)]
422    profiler: Arc<Profiler>,
423
424    /// Cardinality estimator for accurate query planning
425    cardinality_estimator: Arc<CardinalityEstimator>,
426
427    /// Random seed for cache eviction
428    _rng_seed: u64,
429
430    /// Compilation statistics
431    stats: Arc<RwLock<CompilerStats>>,
432}
433
434/// Compiler-wide statistics
435#[derive(Debug, Default, Clone, Serialize, Deserialize)]
436pub struct CompilerStats {
437    /// Total compilations
438    pub total_compilations: usize,
439
440    /// Total compilation time
441    pub total_compilation_time: Duration,
442
443    /// Cache hits
444    pub cache_hits: usize,
445
446    /// Cache misses
447    pub cache_misses: usize,
448
449    /// Cache evictions
450    pub cache_evictions: usize,
451
452    /// Failed compilations
453    pub failed_compilations: usize,
454
455    /// Average compilation time
456    pub avg_compilation_time: Duration,
457}
458
459impl QueryJitCompiler {
460    /// Create a new JIT compiler with the given configuration
461    pub fn new(config: JitCompilerConfig) -> Result<Self> {
462        // Initialize metrics
463        let metrics = Arc::new(MetricsRegistry::new());
464
465        // Initialize profiler
466        let profiler = Arc::new(Profiler::new());
467
468        // Initialize cardinality estimator with default configuration
469        let cardinality_estimator = Arc::new(CardinalityEstimator::new(EstimatorConfig::default()));
470
471        Ok(Self {
472            config,
473            query_cache: Arc::new(DashMap::new()),
474            metrics,
475            profiler,
476            cardinality_estimator,
477            _rng_seed: 42,
478            stats: Arc::new(RwLock::new(CompilerStats::default())),
479        })
480    }
481
482    /// Compile a SPARQL algebra expression
483    pub fn compile(&mut self, algebra: &Algebra) -> Result<Arc<CompiledQuery>> {
484        let start_time = Instant::now();
485
486        // Generate query ID from algebra
487        let query_id = self.generate_query_id(algebra);
488
489        // Check cache first
490        if self.config.enable_caching {
491            if let Some(cached) = self.query_cache.get(&query_id) {
492                self.record_cache_hit();
493                debug!("JIT cache hit for query: {}", query_id);
494                return Ok(cached.clone());
495            }
496        }
497
498        self.record_cache_miss();
499        info!("Compiling query: {}", query_id);
500
501        // Analyze query to extract metadata
502        let metadata = self.analyze_query(algebra)?;
503
504        // Check complexity threshold
505        if metadata.complexity > self.config.max_plan_complexity {
506            warn!(
507                "Query complexity ({}) exceeds threshold ({}), using basic execution",
508                metadata.complexity, self.config.max_plan_complexity
509            );
510        }
511
512        // Generate execution plan
513        let plan = self.generate_execution_plan(algebra, &metadata)?;
514
515        // Create compiled query
516        let compiled = Arc::new(CompiledQuery {
517            id: query_id.clone(),
518            algebra: Arc::new(algebra.clone()),
519            plan: Arc::new(plan),
520            compiled_at: Instant::now(),
521            stats: Arc::new(RwLock::new(ExecutionStats {
522                compilation_time: start_time.elapsed(),
523                ..Default::default()
524            })),
525            metadata,
526        });
527
528        // Update cache
529        if self.config.enable_caching {
530            self.insert_into_cache(query_id.clone(), compiled.clone())?;
531        }
532
533        // Record metrics
534        let compilation_time = start_time.elapsed();
535        self.record_compilation(compilation_time);
536
537        info!(
538            "Query compiled successfully in {:?}: {}",
539            compilation_time, query_id
540        );
541
542        Ok(compiled)
543    }
544
545    /// Generate a unique identifier for the query
546    fn generate_query_id(&self, algebra: &Algebra) -> String {
547        use std::collections::hash_map::DefaultHasher;
548        use std::hash::{Hash, Hasher};
549
550        let mut hasher = DefaultHasher::new();
551        format!("{:?}", algebra).hash(&mut hasher);
552        format!("query_{:x}", hasher.finish())
553    }
554
555    /// Analyze query to extract compilation metadata
556    fn analyze_query(&self, algebra: &Algebra) -> Result<QueryMetadata> {
557        let mut metadata = QueryMetadata::default();
558
559        // Recursively analyze algebra structure
560        self.analyze_algebra_recursive(algebra, &mut metadata);
561
562        // Calculate complexity score
563        metadata.complexity = self.calculate_complexity(&metadata);
564
565        // Estimate memory usage
566        metadata.estimated_memory = self.estimate_memory(&metadata);
567
568        // Identify optimization opportunities
569        metadata.optimization_opportunities = self.identify_optimizations(&metadata);
570
571        Ok(metadata)
572    }
573
574    /// Recursively analyze algebra structure
575    #[allow(clippy::only_used_in_recursion)]
576    fn analyze_algebra_recursive(&self, algebra: &Algebra, metadata: &mut QueryMetadata) {
577        match algebra {
578            Algebra::Bgp(patterns) => {
579                metadata.triple_pattern_count += patterns.len();
580            }
581            Algebra::Join { left, right } => {
582                metadata.join_count += 1;
583                self.analyze_algebra_recursive(left, metadata);
584                self.analyze_algebra_recursive(right, metadata);
585            }
586            Algebra::Filter { pattern, .. } => {
587                metadata.filter_count += 1;
588                self.analyze_algebra_recursive(pattern, metadata);
589            }
590            Algebra::LeftJoin { left, right, .. } => {
591                metadata.has_optional = true;
592                metadata.join_count += 1;
593                self.analyze_algebra_recursive(left, metadata);
594                self.analyze_algebra_recursive(right, metadata);
595            }
596            Algebra::Union { left, right } => {
597                metadata.has_union = true;
598                self.analyze_algebra_recursive(left, metadata);
599                self.analyze_algebra_recursive(right, metadata);
600            }
601            Algebra::Group { pattern, .. } => {
602                metadata.has_aggregation = true;
603                self.analyze_algebra_recursive(pattern, metadata);
604            }
605            Algebra::Project { pattern, .. }
606            | Algebra::Distinct { pattern }
607            | Algebra::Reduced { pattern }
608            | Algebra::OrderBy { pattern, .. } => {
609                self.analyze_algebra_recursive(pattern, metadata);
610            }
611            Algebra::Slice {
612                pattern,
613                offset: _,
614                limit: _,
615            } => {
616                self.analyze_algebra_recursive(pattern, metadata);
617            }
618            Algebra::Graph { graph: _, pattern } => {
619                self.analyze_algebra_recursive(pattern, metadata);
620            }
621            Algebra::Extend { pattern, .. } => {
622                self.analyze_algebra_recursive(pattern, metadata);
623            }
624            Algebra::Minus { left, right } => {
625                self.analyze_algebra_recursive(left, metadata);
626                self.analyze_algebra_recursive(right, metadata);
627            }
628            _ => {}
629        }
630    }
631
632    /// Calculate query complexity score
633    fn calculate_complexity(&self, metadata: &QueryMetadata) -> usize {
634        let mut score = 0;
635
636        // Base complexity from triple patterns
637        score += metadata.triple_pattern_count * 10;
638
639        // Join complexity (exponential growth)
640        score += metadata.join_count.pow(2) * 20;
641
642        // Filter complexity
643        score += metadata.filter_count * 15;
644
645        // Additional complexity for special features
646        if metadata.has_aggregation {
647            score += 50;
648        }
649        if metadata.has_optional {
650            score += 30;
651        }
652        if metadata.has_union {
653            score += 25;
654        }
655
656        score.min(1000) // Cap at 1000
657    }
658
659    /// Estimate memory usage for query execution
660    fn estimate_memory(&self, metadata: &QueryMetadata) -> usize {
661        let base_memory = 1024 * 1024; // 1MB base
662
663        // Memory per triple pattern
664        let pattern_memory = metadata.triple_pattern_count * 100 * 1024; // 100KB per pattern
665
666        // Memory per join (increases exponentially)
667        let join_memory = if metadata.join_count > 0 {
668            2_usize.pow(metadata.join_count as u32) * 50 * 1024 // 50KB * 2^joins
669        } else {
670            0
671        };
672
673        base_memory + pattern_memory + join_memory
674    }
675
676    /// Identify optimization opportunities
677    fn identify_optimizations(&self, metadata: &QueryMetadata) -> Vec<String> {
678        let mut opportunities = Vec::new();
679
680        if metadata.join_count > 2 {
681            opportunities.push("Consider join reordering".to_string());
682        }
683
684        if metadata.filter_count > 0 {
685            opportunities.push("Filter pushdown optimization".to_string());
686        }
687
688        if metadata.triple_pattern_count > 5 {
689            opportunities.push("Pattern specialization".to_string());
690        }
691
692        if metadata.has_aggregation {
693            opportunities.push("Streaming aggregation".to_string());
694        }
695
696        opportunities
697    }
698
699    /// Generate execution plan from algebra
700    fn generate_execution_plan(
701        &self,
702        algebra: &Algebra,
703        metadata: &QueryMetadata,
704    ) -> Result<ExecutionPlan> {
705        let mut operations = Vec::new();
706        let mut specializations = Vec::new();
707
708        // Lower algebra to operations
709        self.lower_to_operations(algebra, &mut operations)?;
710
711        // Apply optimizations based on configuration
712        if self.config.optimization_level >= 1 {
713            self.optimize_plan(&mut operations, &mut specializations)?;
714        }
715
716        // Calculate estimated cost
717        let estimated_cost = self.calculate_plan_cost(&operations);
718
719        Ok(ExecutionPlan {
720            operations,
721            estimated_cost,
722            estimated_memory: metadata.estimated_memory,
723            optimization_hints: metadata.optimization_opportunities.clone(),
724            specializations,
725        })
726    }
727
728    /// Lower algebra to executable operations
729    #[allow(clippy::ptr_arg)]
730    fn lower_to_operations(&self, algebra: &Algebra, ops: &mut Vec<PlanOperation>) -> Result<()> {
731        match algebra {
732            Algebra::Bgp(patterns) => {
733                // Basic graph pattern - scan operation
734                let pattern_type = self.determine_pattern_type(algebra);
735
736                // Estimate cardinality using CardinalityEstimator
737                let estimated_cardinality = if !patterns.is_empty() {
738                    // Use the first pattern for estimation
739                    // For multiple patterns, we could sum or average the estimates
740                    match self
741                        .cardinality_estimator
742                        .estimate_triple_pattern(&patterns[0])
743                    {
744                        Ok(cardinality) => cardinality,
745                        Err(e) => {
746                            warn!("Cardinality estimation failed: {}, using default", e);
747                            10_000 // Fallback to conservative default
748                        }
749                    }
750                } else {
751                    10_000 // Default for empty BGP
752                };
753
754                debug!(
755                    "BGP cardinality estimate: {} for {} patterns",
756                    estimated_cardinality,
757                    patterns.len()
758                );
759
760                ops.push(PlanOperation::ScanTriples {
761                    pattern_id: ops.len(),
762                    pattern_type,
763                    estimated_cardinality: estimated_cardinality.try_into().unwrap_or(10_000),
764                });
765            }
766            Algebra::Join { left, right } => {
767                let left_start = ops.len();
768                self.lower_to_operations(left, ops)?;
769
770                let right_start = ops.len();
771                self.lower_to_operations(right, ops)?;
772
773                // Extract join variables (intersection of left and right variables)
774                let left_vars = left.variables();
775                let right_vars = right.variables();
776                let join_variables: Vec<String> = left_vars
777                    .iter()
778                    .filter(|v| right_vars.contains(v))
779                    .map(|v| v.name().to_string())
780                    .collect();
781
782                ops.push(PlanOperation::HashJoin {
783                    left_id: left_start,
784                    right_id: right_start,
785                    join_variables,
786                    strategy: JitJoinStrategy::Hash,
787                });
788            }
789            Algebra::Filter { pattern, .. } => {
790                self.lower_to_operations(pattern, ops)?;
791
792                ops.push(PlanOperation::Filter {
793                    expr_id: ops.len(),
794                    filter_type: FilterType::Complex,
795                });
796            }
797            Algebra::Project { pattern, variables } => {
798                self.lower_to_operations(pattern, ops)?;
799
800                ops.push(PlanOperation::Project {
801                    variables: variables.iter().map(|v| v.name().to_string()).collect(),
802                });
803            }
804            Algebra::Distinct { pattern } => {
805                self.lower_to_operations(pattern, ops)?;
806                ops.push(PlanOperation::Distinct);
807            }
808            Algebra::Slice {
809                pattern,
810                offset,
811                limit,
812            } => {
813                self.lower_to_operations(pattern, ops)?;
814
815                if let Some(off) = offset {
816                    ops.push(PlanOperation::Offset { offset: *off });
817                }
818                if let Some(lim) = limit {
819                    ops.push(PlanOperation::Limit { limit: *lim });
820                }
821            }
822            _ => {
823                // For other types, use a basic scan operation
824                ops.push(PlanOperation::ScanTriples {
825                    pattern_id: ops.len(),
826                    pattern_type: PatternType::AllVariables,
827                    estimated_cardinality: 1000,
828                });
829            }
830        }
831
832        Ok(())
833    }
834
835    /// Determine the type of triple pattern for specialization
836    fn determine_pattern_type(&self, _algebra: &Algebra) -> PatternType {
837        // For now, return AllVariables - would need actual pattern analysis
838        PatternType::AllVariables
839    }
840
841    /// Optimize the execution plan
842    #[allow(clippy::ptr_arg)]
843    fn optimize_plan(
844        &self,
845        operations: &mut Vec<PlanOperation>,
846        specializations: &mut Vec<Specialization>,
847    ) -> Result<()> {
848        // Apply pattern-specific optimizations
849        for op in operations.iter_mut() {
850            if let PlanOperation::ScanTriples { pattern_type, .. } = op {
851                // Record specialization
852                specializations.push(Specialization {
853                    spec_type: SpecializationType::PatternScanning,
854                    description: format!("Specialized scan for pattern type: {:?}", pattern_type),
855                    speedup_factor: 1.5,
856                });
857            }
858        }
859
860        Ok(())
861    }
862
863    /// Calculate estimated cost of execution plan
864    fn calculate_plan_cost(&self, operations: &[PlanOperation]) -> f64 {
865        let mut total_cost = 0.0;
866
867        for op in operations {
868            total_cost += match op {
869                PlanOperation::ScanTriples {
870                    estimated_cardinality,
871                    ..
872                } => *estimated_cardinality as f64 * 0.1,
873                PlanOperation::HashJoin { .. } => 100.0,
874                PlanOperation::NestedLoopJoin { .. } => 500.0,
875                PlanOperation::Filter { .. } => 10.0,
876                PlanOperation::Project { .. } => 5.0,
877                PlanOperation::Sort { .. } => 200.0,
878                PlanOperation::Limit { .. } => 1.0,
879                PlanOperation::Offset { .. } => 1.0,
880                PlanOperation::Distinct => 150.0,
881                PlanOperation::Union { .. } => 50.0,
882                PlanOperation::Optional { .. } => 120.0,
883            };
884        }
885
886        total_cost
887    }
888
889    /// Insert compiled query into cache
890    fn insert_into_cache(&self, query_id: String, compiled: Arc<CompiledQuery>) -> Result<()> {
891        // Check cache size and evict if necessary
892        if self.query_cache.len() * 1024 * 1024 > self.config.max_cache_size {
893            self.evict_cache_entry()?;
894        }
895
896        self.query_cache.insert(query_id, compiled);
897
898        Ok(())
899    }
900
901    /// Evict a cache entry (random eviction for now)
902    fn evict_cache_entry(&self) -> Result<()> {
903        if let Some(key) = self.query_cache.iter().next().map(|e| e.key().clone()) {
904            self.query_cache.remove(&key);
905            self.stats.write().cache_evictions += 1;
906            debug!("Evicted cached query: {}", key);
907        }
908
909        Ok(())
910    }
911
912    /// Record a cache hit
913    fn record_cache_hit(&self) {
914        self.stats.write().cache_hits += 1;
915    }
916
917    /// Record a cache miss
918    fn record_cache_miss(&self) {
919        self.stats.write().cache_misses += 1;
920    }
921
922    /// Record a compilation
923    fn record_compilation(&self, duration: Duration) {
924        let mut stats = self.stats.write();
925        stats.total_compilations += 1;
926        stats.total_compilation_time += duration;
927        stats.avg_compilation_time = if stats.total_compilations > 0 {
928            stats.total_compilation_time / stats.total_compilations as u32
929        } else {
930            Duration::ZERO
931        };
932    }
933
934    /// Get compiler statistics
935    pub fn stats(&self) -> CompilerStats {
936        self.stats.read().clone()
937    }
938
939    /// Clear the query cache
940    pub fn clear_cache(&self) {
941        self.query_cache.clear();
942        info!("JIT query cache cleared");
943    }
944}
945
946#[cfg(test)]
947mod tests {
948    use super::*;
949
950    #[test]
951    fn test_jit_compiler_creation() {
952        let config = JitCompilerConfig::default();
953        let compiler = QueryJitCompiler::new(config);
954        assert!(compiler.is_ok());
955    }
956
957    #[test]
958    fn test_complexity_calculation() {
959        let compiler = QueryJitCompiler::new(JitCompilerConfig::default()).unwrap();
960
961        let metadata = QueryMetadata {
962            triple_pattern_count: 5,
963            join_count: 2,
964            filter_count: 3,
965            has_aggregation: true,
966            has_optional: true,
967            has_union: false,
968            ..Default::default()
969        };
970
971        let complexity = compiler.calculate_complexity(&metadata);
972        assert!(complexity > 0);
973        assert!(complexity <= 1000);
974    }
975
976    #[test]
977    fn test_memory_estimation() {
978        let compiler = QueryJitCompiler::new(JitCompilerConfig::default()).unwrap();
979
980        let metadata = QueryMetadata {
981            triple_pattern_count: 3,
982            join_count: 2,
983            ..Default::default()
984        };
985
986        let memory = compiler.estimate_memory(&metadata);
987        assert!(memory > 0);
988    }
989
990    #[test]
991    fn test_cache_operations() {
992        let config = JitCompilerConfig {
993            enable_caching: true,
994            ..Default::default()
995        };
996        let compiler = QueryJitCompiler::new(config).unwrap();
997
998        // Initially empty
999        assert_eq!(compiler.query_cache.len(), 0);
1000
1001        // Clear should work on empty cache
1002        compiler.clear_cache();
1003        assert_eq!(compiler.query_cache.len(), 0);
1004    }
1005
1006    #[test]
1007    fn test_execution_stats() {
1008        let mut stats = ExecutionStats::default();
1009
1010        stats.record_execution(Duration::from_millis(100), 50);
1011        assert_eq!(stats.execution_count, 1);
1012        assert_eq!(stats.total_results, 50);
1013        assert_eq!(stats.avg_results, 50.0);
1014
1015        stats.record_execution(Duration::from_millis(200), 30);
1016        assert_eq!(stats.execution_count, 2);
1017        assert_eq!(stats.total_results, 80);
1018        assert_eq!(stats.avg_results, 40.0);
1019    }
1020
1021    #[test]
1022    fn test_should_reoptimize() {
1023        let mut stats = ExecutionStats::default();
1024
1025        // Not enough executions
1026        assert!(!stats.should_reoptimize(10));
1027
1028        // Execute many times with slow queries
1029        for _ in 0..15 {
1030            stats.record_execution(Duration::from_millis(150), 10);
1031        }
1032
1033        assert!(stats.should_reoptimize(10));
1034    }
1035
1036    #[test]
1037    fn test_pattern_type_variants() {
1038        // Test that all pattern types are defined
1039        let patterns = [
1040            PatternType::AllVariables,
1041            PatternType::SubjectBound,
1042            PatternType::PredicateBound,
1043            PatternType::ObjectBound,
1044            PatternType::SubjectPredicateBound,
1045            PatternType::SubjectObjectBound,
1046            PatternType::PredicateObjectBound,
1047            PatternType::FullyBound,
1048        ];
1049
1050        assert_eq!(patterns.len(), 8);
1051    }
1052
1053    #[test]
1054    fn test_join_strategy_variants() {
1055        let strategies = [
1056            JitJoinStrategy::Hash,
1057            JitJoinStrategy::SortMerge,
1058            JitJoinStrategy::IndexNestedLoop,
1059            JitJoinStrategy::Bind,
1060        ];
1061
1062        assert_eq!(strategies.len(), 4);
1063    }
1064}
1065
1066#[cfg(test)]
1067mod extended_tests {
1068    use super::*;
1069    use std::time::Duration;
1070
1071    fn default_compiler() -> QueryJitCompiler {
1072        QueryJitCompiler::new(JitCompilerConfig::default()).unwrap()
1073    }
1074
1075    // --- JitCompilerConfig tests ---
1076
1077    #[test]
1078    fn test_default_config_has_reasonable_values() {
1079        let config = JitCompilerConfig::default();
1080        assert!(config.enabled, "Compiler should be enabled by default");
1081        assert!(
1082            config.enable_caching,
1083            "Caching should be enabled by default"
1084        );
1085        assert!(config.max_cache_size > 0, "Cache size should be positive");
1086        assert!(
1087            config.optimization_level <= 3,
1088            "Optimization level should be 0-3"
1089        );
1090        assert!(
1091            config.max_plan_complexity > 0,
1092            "Max plan complexity should be positive"
1093        );
1094    }
1095
1096    #[test]
1097    fn test_config_with_disabled_caching() {
1098        let config = JitCompilerConfig {
1099            enable_caching: false,
1100            ..Default::default()
1101        };
1102        let compiler = QueryJitCompiler::new(config);
1103        assert!(
1104            compiler.is_ok(),
1105            "Compiler should initialize with caching disabled"
1106        );
1107    }
1108
1109    #[test]
1110    fn test_config_with_disabled_compiler() {
1111        let config = JitCompilerConfig {
1112            enabled: false,
1113            ..Default::default()
1114        };
1115        let compiler = QueryJitCompiler::new(config);
1116        assert!(
1117            compiler.is_ok(),
1118            "Compiler should initialize even when disabled"
1119        );
1120    }
1121
1122    // --- ExecutionStats tests ---
1123
1124    #[test]
1125    fn test_execution_stats_initial_state() {
1126        let stats = ExecutionStats::default();
1127        assert_eq!(stats.execution_count, 0);
1128        assert_eq!(stats.total_results, 0);
1129        assert!(stats.min_execution_time.is_none());
1130        assert!(stats.max_execution_time.is_none());
1131    }
1132
1133    #[test]
1134    fn test_execution_stats_min_max_tracking() {
1135        let mut stats = ExecutionStats::default();
1136        stats.record_execution(Duration::from_millis(50), 10);
1137        stats.record_execution(Duration::from_millis(200), 20);
1138        stats.record_execution(Duration::from_millis(100), 15);
1139
1140        assert_eq!(stats.min_execution_time.unwrap(), Duration::from_millis(50));
1141        assert_eq!(
1142            stats.max_execution_time.unwrap(),
1143            Duration::from_millis(200)
1144        );
1145    }
1146
1147    #[test]
1148    fn test_execution_stats_average_results() {
1149        let mut stats = ExecutionStats::default();
1150        stats.record_execution(Duration::from_millis(10), 10);
1151        stats.record_execution(Duration::from_millis(10), 20);
1152        stats.record_execution(Duration::from_millis(10), 30);
1153
1154        assert!(
1155            (stats.avg_results - 20.0).abs() < 0.001,
1156            "Average results should be 20.0"
1157        );
1158    }
1159
1160    #[test]
1161    fn test_execution_stats_total_time_accumulates() {
1162        let mut stats = ExecutionStats::default();
1163        stats.record_execution(Duration::from_millis(100), 5);
1164        stats.record_execution(Duration::from_millis(200), 5);
1165
1166        assert_eq!(stats.total_execution_time, Duration::from_millis(300));
1167    }
1168
1169    #[test]
1170    fn test_should_reoptimize_below_min_executions() {
1171        let mut stats = ExecutionStats::default();
1172        // Just 5 executions with slow queries - below threshold
1173        for _ in 0..5 {
1174            stats.record_execution(Duration::from_millis(200), 10);
1175        }
1176        assert!(
1177            !stats.should_reoptimize(10),
1178            "Should not reoptimize below min_executions threshold"
1179        );
1180    }
1181
1182    #[test]
1183    fn test_should_not_reoptimize_fast_queries() {
1184        let mut stats = ExecutionStats::default();
1185        // Many executions but all very fast
1186        for _ in 0..20 {
1187            stats.record_execution(Duration::from_millis(1), 10);
1188        }
1189        assert!(
1190            !stats.should_reoptimize(10),
1191            "Fast queries should not trigger reoptimization"
1192        );
1193    }
1194
1195    // --- QueryMetadata tests ---
1196
1197    #[test]
1198    fn test_query_metadata_default() {
1199        let meta = QueryMetadata::default();
1200        assert_eq!(meta.triple_pattern_count, 0);
1201        assert!(!meta.has_aggregation);
1202        assert!(!meta.has_optional);
1203        assert!(!meta.has_union);
1204    }
1205
1206    #[test]
1207    fn test_query_metadata_with_aggregation() {
1208        let meta = QueryMetadata {
1209            has_aggregation: true,
1210            join_count: 3,
1211            triple_pattern_count: 4,
1212            ..Default::default()
1213        };
1214        assert!(meta.has_aggregation);
1215        assert_eq!(meta.join_count, 3);
1216    }
1217
1218    // --- CompilerStats tests ---
1219
1220    #[test]
1221    fn test_compiler_stats_initial_values() {
1222        let compiler = default_compiler();
1223        let stats = compiler.stats();
1224        assert_eq!(stats.total_compilations, 0);
1225        assert_eq!(stats.cache_hits, 0);
1226        assert_eq!(stats.cache_misses, 0);
1227        assert_eq!(stats.cache_evictions, 0);
1228    }
1229
1230    // --- Cache management tests ---
1231
1232    #[test]
1233    fn test_clear_cache_makes_it_empty() {
1234        let compiler = default_compiler();
1235        // Cache starts empty; clear should still work
1236        compiler.clear_cache();
1237        assert_eq!(compiler.query_cache.len(), 0);
1238    }
1239
1240    // --- JoinStrategy and FilterType tests ---
1241
1242    #[test]
1243    fn test_jit_join_strategy_all_variants() {
1244        let _: Vec<JitJoinStrategy> = vec![
1245            JitJoinStrategy::Hash,
1246            JitJoinStrategy::SortMerge,
1247            JitJoinStrategy::IndexNestedLoop,
1248            JitJoinStrategy::Bind,
1249        ];
1250    }
1251
1252    #[test]
1253    fn test_filter_type_all_variants() {
1254        let _: Vec<FilterType> = vec![
1255            FilterType::Equality,
1256            FilterType::NumericComparison,
1257            FilterType::StringOperation,
1258            FilterType::Regex,
1259            FilterType::BooleanLogic,
1260            FilterType::Complex,
1261        ];
1262    }
1263
1264    // --- PatternType tests ---
1265
1266    #[test]
1267    fn test_pattern_type_all_8_variants_coverage() {
1268        let variants = [
1269            PatternType::AllVariables,
1270            PatternType::SubjectBound,
1271            PatternType::PredicateBound,
1272            PatternType::ObjectBound,
1273            PatternType::SubjectPredicateBound,
1274            PatternType::SubjectObjectBound,
1275            PatternType::PredicateObjectBound,
1276            PatternType::FullyBound,
1277        ];
1278        // All variants should be uniquely representable
1279        assert_eq!(variants.len(), 8);
1280    }
1281}