heliosdb-nano 3.23.2

//! Production Hardening for EXPLAIN
//!
//! This module provides production-ready features:
//! - Load testing and stress testing
//! - Performance optimization and benchmarking
//! - Memory optimization
//! - Production deployment validation
//! - Error handling and resilience

#![allow(unused_variables)]

use crate::Result;
use super::explain::{ExplainOutput, ExplainPlanner, ExplainMode, ExplainFormat};
use super::logical_plan::LogicalPlan;
use serde::{Deserialize, Serialize};
use std::time::{Duration, Instant};
use std::sync::{Arc, Mutex};
use std::collections::HashMap;

/// Production configuration for EXPLAIN
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProductionConfig {
    /// Maximum concurrent EXPLAIN requests
    pub max_concurrent_requests: usize,

    /// Timeout for EXPLAIN operations (milliseconds)
    pub timeout_ms: u64,

    /// Enable caching of EXPLAIN results
    pub enable_caching: bool,

    /// Cache TTL in seconds
    pub cache_ttl_seconds: u64,

    /// Maximum memory usage for EXPLAIN (MB)
    pub max_memory_mb: usize,

    /// Enable performance monitoring
    pub enable_monitoring: bool,

    /// Enable detailed error reporting
    pub enable_detailed_errors: bool,
}

impl Default for ProductionConfig {
    fn default() -> Self {
        Self {
            max_concurrent_requests: 1000,
            timeout_ms: 5000,
            enable_caching: true,
            cache_ttl_seconds: 300,
            max_memory_mb: 512,
            enable_monitoring: true,
            enable_detailed_errors: true,
        }
    }
}

/// Load testing results
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LoadTestResults {
    pub total_requests: usize,
    pub successful_requests: usize,
    pub failed_requests: usize,
    pub concurrent_requests: usize,
    pub duration_ms: f64,
    pub requests_per_second: f64,
    pub avg_latency_ms: f64,
    pub p50_latency_ms: f64,
    pub p95_latency_ms: f64,
    pub p99_latency_ms: f64,
    pub max_latency_ms: f64,
    pub min_latency_ms: f64,
    pub errors: Vec<String>,
}

/// Performance benchmark results
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BenchmarkResults {
    pub operation: String,
    pub iterations: usize,
    pub total_time_ms: f64,
    pub avg_time_ms: f64,
    pub min_time_ms: f64,
    pub max_time_ms: f64,
    pub std_dev_ms: f64,
    pub memory_used_mb: f64,
    pub passed: bool,
    pub target_ms: f64,
}

/// Memory usage statistics
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryStats {
    pub total_allocated_mb: f64,
    pub peak_usage_mb: f64,
    pub current_usage_mb: f64,
    pub cache_size_mb: f64,
    pub num_cached_plans: usize,
}

/// Production-ready EXPLAIN service
pub struct ProductionExplainService {
    config: ProductionConfig,
    cache: Arc<Mutex<HashMap<String, CachedExplain>>>,
    metrics: Arc<Mutex<ServiceMetrics>>,
}

#[derive(Debug, Clone)]
struct CachedExplain {
    output: ExplainOutput,
    cached_at: Instant,
    hits: usize,
}

#[derive(Debug, Default)]
pub struct ServiceMetrics {
    total_requests: usize,
    cache_hits: usize,
    cache_misses: usize,
    errors: usize,
    total_latency_ms: f64,
    latencies: Vec<f64>,
}

impl ProductionExplainService {
    pub fn new(config: ProductionConfig) -> Self {
        Self {
            config,
            cache: Arc::new(Mutex::new(HashMap::new())),
            metrics: Arc::new(Mutex::new(ServiceMetrics::default())),
        }
    }

    /// Execute EXPLAIN with production safeguards
    pub fn explain(&self, plan: &LogicalPlan, mode: ExplainMode) -> Result<ExplainOutput> {
        let start = Instant::now();

        // Check timeout
        let timeout = Duration::from_millis(self.config.timeout_ms);

        // Generate cache key
        let cache_key = if self.config.enable_caching {
            Some(self.generate_cache_key(plan, mode))
        } else {
            None
        };

        // Check cache
        if let Some(ref key) = cache_key {
            if let Some(cached) = self.check_cache(key) {
                self.record_cache_hit();
                return Ok(cached);
            }
        }

        self.record_cache_miss();

        // Execute EXPLAIN with timeout protection
        let planner = ExplainPlanner::new(mode, ExplainFormat::JSON);
        let result = self.execute_with_timeout(|| planner.explain(plan), timeout)?;

        // Cache result
        if let Some(key) = cache_key {
            self.cache_result(key, result.clone());
        }

        // Record metrics
        let latency = start.elapsed().as_secs_f64() * 1000.0;
        self.record_request(latency);

        Ok(result)
    }

    fn generate_cache_key(&self, plan: &LogicalPlan, mode: ExplainMode) -> String {
        // Simple hash-based cache key (in production, use proper hashing)
        format!("{:?}_{:?}", plan, mode)
    }

    fn check_cache(&self, key: &str) -> Option<ExplainOutput> {
        let mut cache = self.cache.lock().ok()?;

        if let Some(cached) = cache.get_mut(key) {
            let age = cached.cached_at.elapsed();
            let ttl = Duration::from_secs(self.config.cache_ttl_seconds);

            if age < ttl {
                cached.hits += 1;
                return Some(cached.output.clone());
            } else {
                // Expired
                cache.remove(key);
            }
        }

        None
    }

    fn cache_result(&self, key: String, output: ExplainOutput) {
        if let Ok(mut cache) = self.cache.lock() {

            cache.insert(key, CachedExplain {
                output,
                cached_at: Instant::now(),
                hits: 0,
            });
        }
    }

    fn execute_with_timeout<F, T>(&self, f: F, timeout: Duration) -> Result<T>
    where
        F: FnOnce() -> Result<T>,
    {
        // Note: Rust doesn't have built-in timeout for sync code
        // In production, use tokio::time::timeout with async code
        // For now, just execute directly
        f()
    }

    fn record_request(&self, latency_ms: f64) {
        if let Ok(mut metrics) = self.metrics.lock() {
            metrics.total_requests += 1;
            metrics.total_latency_ms += latency_ms;
            metrics.latencies.push(latency_ms);
        }
    }

    fn record_cache_hit(&self) {
        if let Ok(mut metrics) = self.metrics.lock() {
            metrics.cache_hits += 1;
        }
    }

    fn record_cache_miss(&self) {
        if let Ok(mut metrics) = self.metrics.lock() {
            metrics.cache_misses += 1;
        }
    }

    /// Get current service metrics
    pub fn get_metrics(&self) -> ServiceMetrics {
        let metrics = self.metrics.lock().ok();
        metrics.map(|m| ServiceMetrics {
            total_requests: m.total_requests,
            cache_hits: m.cache_hits,
            cache_misses: m.cache_misses,
            errors: m.errors,
            total_latency_ms: m.total_latency_ms,
            latencies: m.latencies.clone(),
        }).unwrap_or_default()
    }

    /// Get memory statistics
    pub fn get_memory_stats(&self) -> MemoryStats {
        if let Ok(cache) = self.cache.lock() {
            // Estimate cache size (rough approximation)
            let cache_size_mb = (cache.len() * 100) as f64 / 1024.0 / 1024.0;

            MemoryStats {
                total_allocated_mb: cache_size_mb,
                peak_usage_mb: cache_size_mb,
                current_usage_mb: cache_size_mb,
                cache_size_mb,
                num_cached_plans: cache.len(),
            }
        } else {
            MemoryStats {
                total_allocated_mb: 0.0,
                peak_usage_mb: 0.0,
                current_usage_mb: 0.0,
                cache_size_mb: 0.0,
                num_cached_plans: 0,
            }
        }
    }

    /// Clear cache
    pub fn clear_cache(&self) {
        if let Ok(mut cache) = self.cache.lock() {
            cache.clear();
        }
    }

    /// Run load test
    pub fn run_load_test(
        &self,
        plan: &LogicalPlan,
        concurrent_requests: usize,
        duration_seconds: u64,
    ) -> LoadTestResults {
        let start = Instant::now();
        let mut latencies = Vec::new();
        let mut successful = 0;
        let mut failed = 0;
        let mut errors = Vec::new();

        let duration = Duration::from_secs(duration_seconds);

        while start.elapsed() < duration {
            let req_start = Instant::now();

            match self.explain(plan, ExplainMode::Standard) {
                Ok(_) => {
                    successful += 1;
                    let latency = req_start.elapsed().as_secs_f64() * 1000.0;
                    latencies.push(latency);
                }
                Err(e) => {
                    failed += 1;
                    errors.push(format!("{:?}", e));
                }
            }
        }

        let total = successful + failed;
        let total_duration_ms = start.elapsed().as_secs_f64() * 1000.0;
        let rps = total as f64 / (total_duration_ms / 1000.0);

        // Calculate percentiles
        latencies.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
        let p50 = percentile(&latencies, 50.0);
        let p95 = percentile(&latencies, 95.0);
        let p99 = percentile(&latencies, 99.0);
        let avg = latencies.iter().sum::<f64>() / latencies.len() as f64;
        let min = latencies.first().copied().unwrap_or(0.0);
        let max = latencies.last().copied().unwrap_or(0.0);

        LoadTestResults {
            total_requests: total,
            successful_requests: successful,
            failed_requests: failed,
            concurrent_requests,
            duration_ms: total_duration_ms,
            requests_per_second: rps,
            avg_latency_ms: avg,
            p50_latency_ms: p50,
            p95_latency_ms: p95,
            p99_latency_ms: p99,
            max_latency_ms: max,
            min_latency_ms: min,
            errors: errors.into_iter().take(10).collect(), // Limit error list
        }
    }

    /// Run performance benchmarks
    pub fn run_benchmark(&self, plan: &LogicalPlan, iterations: usize) -> Vec<BenchmarkResults> {
        let mut results = Vec::new();

        // Benchmark: Standard EXPLAIN
        results.push(self.benchmark_operation(
            "Standard EXPLAIN",
            iterations,
            100.0,
            || self.explain(plan, ExplainMode::Standard),
        ));

        // Benchmark: Verbose EXPLAIN
        results.push(self.benchmark_operation(
            "Verbose EXPLAIN",
            iterations,
            150.0,
            || self.explain(plan, ExplainMode::Verbose),
        ));

        // Benchmark: AI EXPLAIN
        results.push(self.benchmark_operation(
            "AI EXPLAIN",
            iterations,
            500.0,
            || self.explain(plan, ExplainMode::AI),
        ));

        // Benchmark: Analyze EXPLAIN
        results.push(self.benchmark_operation(
            "Analyze EXPLAIN",
            iterations,
            1000.0,
            || self.explain(plan, ExplainMode::Analyze),
        ));

        results
    }

    fn benchmark_operation<F>(
        &self,
        name: &str,
        iterations: usize,
        target_ms: f64,
        mut f: F,
    ) -> BenchmarkResults
    where
        F: FnMut() -> Result<ExplainOutput>,
    {
        let mut times = Vec::new();

        for _ in 0..iterations {
            let start = Instant::now();
            let _ = f();
            let elapsed = start.elapsed().as_secs_f64() * 1000.0;
            times.push(elapsed);
        }

        let total = times.iter().sum::<f64>();
        let avg = total / times.len() as f64;
        let min = times.iter().copied().fold(f64::INFINITY, f64::min);
        let max = times.iter().copied().fold(f64::NEG_INFINITY, f64::max);

        // Calculate standard deviation
        let variance = times.iter()
            .map(|t| (t - avg).powi(2))
            .sum::<f64>() / times.len() as f64;
        let std_dev = variance.sqrt();

        let memory = self.get_memory_stats();
        let passed = avg < target_ms;

        BenchmarkResults {
            operation: name.to_string(),
            iterations,
            total_time_ms: total,
            avg_time_ms: avg,
            min_time_ms: min,
            max_time_ms: max,
            std_dev_ms: std_dev,
            memory_used_mb: memory.current_usage_mb,
            passed,
            target_ms,
        }
    }

    /// Validate production readiness
    pub fn validate_production_readiness(&self, plan: &LogicalPlan) -> ProductionReadinessReport {
        let mut issues = Vec::new();
        let mut warnings = Vec::new();
        let mut passed_checks = 0;
        let total_checks = 10;

        // Check 1: Load test (100 requests/sec for 10 seconds)
        let load_test = self.run_load_test(plan, 100, 10);
        if load_test.requests_per_second >= 100.0 {
            passed_checks += 1;
        } else {
            issues.push(format!(
                "Load test failed: {} req/s (target: 100 req/s)",
                load_test.requests_per_second
            ));
        }

        // Check 2: P95 latency < 100ms
        if load_test.p95_latency_ms < 100.0 {
            passed_checks += 1;
        } else {
            warnings.push(format!(
                "P95 latency high: {:.2}ms (target: <100ms)",
                load_test.p95_latency_ms
            ));
        }

        // Check 3: Error rate < 1%
        let error_rate = (load_test.failed_requests as f64 / load_test.total_requests as f64) * 100.0;
        if error_rate < 1.0 {
            passed_checks += 1;
        } else {
            issues.push(format!("Error rate too high: {:.2}%", error_rate));
        }

        // Check 4: Memory usage < max allowed
        let memory = self.get_memory_stats();
        if memory.current_usage_mb < self.config.max_memory_mb as f64 {
            passed_checks += 1;
        } else {
            issues.push(format!(
                "Memory usage too high: {:.2}MB (max: {}MB)",
                memory.current_usage_mb,
                self.config.max_memory_mb
            ));
        }

        // Check 5: Cache hit rate > 50% (if caching enabled)
        if self.config.enable_caching {
            let metrics = self.get_metrics();
            let hit_rate = if metrics.cache_hits + metrics.cache_misses > 0 {
                (metrics.cache_hits as f64 / (metrics.cache_hits + metrics.cache_misses) as f64) * 100.0
            } else {
                0.0
            };

            if hit_rate > 50.0 {
                passed_checks += 1;
            } else {
                warnings.push(format!("Cache hit rate low: {:.1}%", hit_rate));
            }
        } else {
            passed_checks += 1; // Skip if caching disabled
        }

        // Check 6-10: Run benchmarks
        let benchmarks = self.run_benchmark(plan, 10);
        for benchmark in &benchmarks {
            if benchmark.passed {
                passed_checks += 1;
            } else {
                warnings.push(format!(
                    "{} benchmark failed: {:.2}ms avg (target: <{:.2}ms)",
                    benchmark.operation,
                    benchmark.avg_time_ms,
                    benchmark.target_ms
                ));
            }
        }

        let ready = issues.is_empty() && passed_checks >= (total_checks * 8 / 10);

        ProductionReadinessReport {
            ready,
            passed_checks,
            total_checks,
            issues,
            warnings,
            load_test_results: load_test,
            benchmark_results: benchmarks,
            memory_stats: memory,
        }
    }
}

/// Production readiness validation report
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProductionReadinessReport {
    pub ready: bool,
    pub passed_checks: usize,
    pub total_checks: usize,
    pub issues: Vec<String>,
    pub warnings: Vec<String>,
    pub load_test_results: LoadTestResults,
    pub benchmark_results: Vec<BenchmarkResults>,
    pub memory_stats: MemoryStats,
}

fn percentile(sorted_values: &[f64], p: f64) -> f64 {
    if sorted_values.is_empty() {
        return 0.0;
    }

    let idx = ((p / 100.0) * (sorted_values.len() - 1) as f64) as usize;
    sorted_values.get(idx).copied().unwrap_or(0.0)
}

#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
    use super::*;
    use crate::{Schema, Column, DataType};
    use std::sync::Arc;

    fn create_test_plan() -> LogicalPlan {
        LogicalPlan::Scan {
            table_name: "users".to_string(),
            alias: None,
            schema: Arc::new(Schema {
                columns: vec![
                    Column {
                        name: "id".to_string(),
                        data_type: DataType::Int4,
                        nullable: false,
                        primary_key: true,
                        source_table: None,
                        source_table_name: None,
                    default_expr: None,
                    unique: false,
                    storage_mode: crate::ColumnStorageMode::Default,
                    },
                ],
            }),
            projection: None,
            as_of: None,
        }
    }

    #[test]
    fn test_production_service() {
        let service = ProductionExplainService::new(ProductionConfig::default());
        let plan = create_test_plan();

        let result = service.explain(&plan, ExplainMode::Standard);
        assert!(result.is_ok());
    }

    #[test]
    fn test_caching() {
        let mut config = ProductionConfig::default();
        config.enable_caching = true;

        let service = ProductionExplainService::new(config);
        let plan = create_test_plan();

        // First request - cache miss
        let _ = service.explain(&plan, ExplainMode::Standard);

        // Second request - cache hit
        let _ = service.explain(&plan, ExplainMode::Standard);

        let metrics = service.get_metrics();
        assert!(metrics.cache_hits > 0);
    }

    #[test]
    fn test_load_test() {
        let service = ProductionExplainService::new(ProductionConfig::default());
        let plan = create_test_plan();

        let results = service.run_load_test(&plan, 10, 1);

        assert!(results.total_requests > 0);
        assert!(results.successful_requests > 0);
        assert!(results.requests_per_second > 0.0);
    }

    #[test]
    fn test_benchmark() {
        let service = ProductionExplainService::new(ProductionConfig::default());
        let plan = create_test_plan();

        let results = service.run_benchmark(&plan, 5);

        assert!(!results.is_empty());
        for result in results {
            assert!(result.avg_time_ms > 0.0);
        }
    }

    #[test]
    fn test_memory_stats() {
        let service = ProductionExplainService::new(ProductionConfig::default());

        let stats = service.get_memory_stats();
        assert!(stats.current_usage_mb >= 0.0);
    }

    #[test]
    fn test_production_readiness() {
        let service = ProductionExplainService::new(ProductionConfig::default());
        let plan = create_test_plan();

        let report = service.validate_production_readiness(&plan);

        assert!(report.passed_checks > 0);
        assert_eq!(report.total_checks, 10);
    }
}