oxirs-fuseki 0.2.4

SPARQL 1.1/1.2 HTTP protocol server with Fuseki-compatible configuration
Documentation
//! Query optimizers for enhanced SPARQL performance
//!
//! This module contains various optimization engines that improve
//! SPARQL query execution performance.

use crate::error::FusekiResult;
use std::collections::HashMap;

/// Injection detection and prevention
#[derive(Debug, Clone)]
pub struct InjectionDetector {
    patterns: Vec<String>,
    whitelist: Vec<String>,
    pub enabled: bool,
}

impl Default for InjectionDetector {
    fn default() -> Self {
        Self::new()
    }
}

impl InjectionDetector {
    pub fn new() -> Self {
        Self {
            patterns: vec![
                "DROP".to_string(),
                "DELETE WHERE".to_string(),
                "INSERT DATA".to_string(),
                "CLEAR".to_string(),
                "LOAD".to_string(),
            ],
            whitelist: vec![
                "SELECT".to_string(),
                "CONSTRUCT".to_string(),
                "ASK".to_string(),
                "DESCRIBE".to_string(),
            ],
            enabled: true,
        }
    }

    /// Detect potential SPARQL injection attempts
    pub fn detect_injection(&self, query: &str) -> FusekiResult<bool> {
        let upper_query = query.to_uppercase();

        // Check for dangerous patterns
        for pattern in &self.patterns {
            if upper_query.contains(pattern) {
                return Ok(true);
            }
        }

        // Check if query contains only whitelisted operations
        let has_whitelist = self.whitelist.iter().any(|op| upper_query.contains(op));

        if !has_whitelist {
            return Ok(true); // Suspicious - no recognized query type
        }

        Ok(false)
    }

    /// Sanitize query by removing dangerous elements
    pub fn sanitize_query(&self, query: &str) -> FusekiResult<String> {
        let mut sanitized = query.to_string();

        // Remove dangerous patterns
        for pattern in &self.patterns {
            sanitized = sanitized.replace(pattern, "");
        }

        Ok(sanitized)
    }
}

/// Query complexity analyzer
#[derive(Debug, Clone)]
pub struct ComplexityAnalyzer {
    max_depth: usize,
    max_joins: usize,
    max_unions: usize,
}

impl Default for ComplexityAnalyzer {
    fn default() -> Self {
        Self::new()
    }
}

impl ComplexityAnalyzer {
    pub fn new() -> Self {
        Self {
            max_depth: 10,
            max_joins: 20,
            max_unions: 10,
        }
    }

    /// Analyze query complexity
    pub fn analyze_complexity(&self, query: &str) -> FusekiResult<QueryComplexity> {
        let depth = self.calculate_nesting_depth(query);
        let joins = self.count_joins(query);
        let unions = self.count_unions(query);

        Ok(QueryComplexity {
            nesting_depth: depth,
            join_count: joins,
            union_count: unions,
            is_complex: depth > self.max_depth
                || joins > self.max_joins
                || unions > self.max_unions,
        })
    }

    fn calculate_nesting_depth(&self, query: &str) -> usize {
        let mut max_depth: usize = 0;
        let mut current_depth: usize = 0;

        for ch in query.chars() {
            match ch {
                '{' => {
                    current_depth += 1;
                    max_depth = max_depth.max(current_depth);
                }
                '}' => {
                    current_depth = current_depth.saturating_sub(1);
                }
                _ => {}
            }
        }

        max_depth
    }

    fn count_joins(&self, query: &str) -> usize {
        let upper = query.to_uppercase();

        // Count explicit joins
        let explicit_joins = upper.matches("JOIN").count();

        // Count implicit joins (multiple triple patterns)
        let triple_patterns = upper.matches(" . ").count() + 1;

        explicit_joins + triple_patterns.saturating_sub(1_usize)
    }

    fn count_unions(&self, query: &str) -> usize {
        query.to_uppercase().matches("UNION").count()
    }
}

/// Query complexity metrics
#[derive(Debug, Clone)]
pub struct QueryComplexity {
    pub nesting_depth: usize,
    pub join_count: usize,
    pub union_count: usize,
    pub is_complex: bool,
}

/// Performance optimizer for query execution
#[derive(Debug, Clone)]
pub struct PerformanceOptimizer {
    cache: HashMap<String, OptimizedQuery>,
}

impl Default for PerformanceOptimizer {
    fn default() -> Self {
        Self::new()
    }
}

impl PerformanceOptimizer {
    pub fn new() -> Self {
        Self {
            cache: HashMap::new(),
        }
    }

    /// Optimize query for better performance
    pub fn optimize(&mut self, query: &str) -> FusekiResult<String> {
        // Check cache
        if let Some(cached) = self.cache.get(query) {
            return Ok(cached.optimized_query.clone());
        }

        let mut optimized = query.to_string();

        // Apply various optimizations
        optimized = self.optimize_filter_placement(&optimized)?;
        optimized = self.optimize_join_order(&optimized)?;
        optimized = self.optimize_projection(&optimized)?;

        // Cache result
        self.cache.insert(
            query.to_string(),
            OptimizedQuery {
                original_query: query.to_string(),
                optimized_query: optimized.clone(),
                optimization_applied: vec![
                    "filter_placement".to_string(),
                    "join_order".to_string(),
                ],
            },
        );

        Ok(optimized)
    }

    /// Optimize FILTER placement (push down filters)
    fn optimize_filter_placement(&self, query: &str) -> FusekiResult<String> {
        // Simple filter pushdown - move FILTER clauses closer to relevant triple patterns
        // This is a simplified implementation
        Ok(query.to_string())
    }

    /// Optimize join order based on selectivity estimates
    fn optimize_join_order(&self, query: &str) -> FusekiResult<String> {
        // Simple join reordering - place more selective patterns first
        // This is a simplified implementation
        Ok(query.to_string())
    }

    /// Optimize projection (eliminate unnecessary variables)
    fn optimize_projection(&self, query: &str) -> FusekiResult<String> {
        // Remove unused variables from SELECT clause
        // This is a simplified implementation
        Ok(query.to_string())
    }
}

/// Optimized query representation
#[derive(Debug, Clone)]
pub struct OptimizedQuery {
    pub original_query: String,
    pub optimized_query: String,
    pub optimization_applied: Vec<String>,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_injection_detection() {
        let detector = InjectionDetector::new();

        // Safe query
        let safe_query = "SELECT ?s WHERE { ?s ?p ?o }";
        assert!(!detector.detect_injection(safe_query).unwrap());

        // Dangerous query
        let dangerous_query = "DROP GRAPH <http://example.org/graph>";
        assert!(detector.detect_injection(dangerous_query).unwrap());
    }

    #[test]
    fn test_complexity_analysis() {
        let analyzer = ComplexityAnalyzer::new();

        // Simple query
        let simple_query = "SELECT ?s WHERE { ?s ?p ?o }";
        let complexity = analyzer.analyze_complexity(simple_query).unwrap();
        assert!(!complexity.is_complex);

        // Complex query with nesting
        let complex_query = "SELECT ?s WHERE { ?s ?p { ?x ?y { ?z ?a ?b } } }";
        let complexity = analyzer.analyze_complexity(complex_query).unwrap();
        assert!(complexity.nesting_depth > 1);
    }

    #[test]
    fn test_performance_optimization() {
        let mut optimizer = PerformanceOptimizer::new();

        let query =
            "SELECT ?s ?p ?o WHERE { ?s ?p ?o . FILTER(?s = <http://example.org/resource>) }";
        let optimized = optimizer.optimize(query).unwrap();

        // Should return a result (even if not actually optimized in this simple implementation)
        assert!(!optimized.is_empty());

        // Test caching
        let cached_result = optimizer.optimize(query).unwrap();
        assert_eq!(optimized, cached_result);
    }
}