sarif_rust 0.3.0

//! Performance optimizations and monitoring for SARIF processing
//!
//! This module provides performance optimizations, memory management utilities,
//! and benchmarking tools for SARIF operations.

use crate::parser::{SarifError, SarifResult as ParseResult};
use crate::types::{Result as SarifResult, SarifLog};
use std::collections::HashMap;
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::{Duration, Instant};

/// Performance monitoring and optimization utilities
#[derive(Debug, Clone)]
pub struct PerformanceMonitor {
    /// Operation timing records
    pub timings: HashMap<String, Vec<Duration>>,

    /// Memory usage statistics
    pub memory_stats: MemoryStats,

    /// Operation counters
    pub counters: HashMap<String, usize>,

    /// Configuration for performance optimizations
    pub config: PerformanceConfig,
}

/// Memory usage statistics
#[derive(Debug, Clone, Default)]
pub struct MemoryStats {
    /// Peak memory usage during operations
    pub peak_memory_mb: f64,

    /// Current estimated memory usage
    pub current_memory_mb: f64,

    /// Number of allocations tracked
    pub allocation_count: usize,

    /// Memory usage by operation type
    pub memory_by_operation: HashMap<String, f64>,
}

/// Configuration for performance optimizations
#[derive(Debug, Clone)]
pub struct PerformanceConfig {
    /// Enable memory usage tracking
    pub track_memory: bool,

    /// Enable detailed timing
    pub enable_timing: bool,

    /// Maximum memory usage before triggering optimizations (MB)
    pub memory_threshold_mb: f64,

    /// Batch size for processing operations
    pub batch_size: usize,

    /// Enable parallel processing where possible
    pub enable_parallel: bool,

    /// Cache size limits
    pub cache_config: CacheConfig,
}

/// Cache configuration settings
#[derive(Debug, Clone)]
pub struct CacheConfig {
    /// Maximum number of parsed SARIF logs to cache
    pub max_logs: usize,

    /// Maximum number of indexed results to cache
    pub max_indexed_results: usize,

    /// Maximum number of query results to cache
    pub max_query_results: usize,

    /// Cache entry TTL in seconds
    pub ttl_seconds: u64,
}

/// Memory pool for efficient allocation of frequently used objects
pub struct MemoryPool<T> {
    /// Pool of reusable objects
    pool: Vec<T>,

    /// Factory function for creating new objects
    factory: Box<dyn Fn() -> T + Send + Sync>,

    /// Maximum pool size
    max_size: usize,

    /// Current pool usage statistics
    stats: PoolStats,
}

/// Statistics for memory pool usage
#[derive(Debug, Default)]
pub struct PoolStats {
    /// Number of objects borrowed from pool
    pub borrowed: AtomicUsize,

    /// Number of objects returned to pool
    pub returned: AtomicUsize,

    /// Number of new objects created
    pub created: AtomicUsize,

    /// Peak pool size
    pub peak_size: AtomicUsize,
}

/// Optimized SARIF processor with performance enhancements
#[allow(dead_code)]
pub struct OptimizedSarifProcessor {
    /// Performance monitor
    monitor: PerformanceMonitor,

    /// Memory pools for common objects
    result_pool: Arc<MemoryPool<SarifResult>>,

    /// String interning for reducing memory usage
    string_interner: StringInterner,

    /// Cached computations
    computation_cache: ComputationCache,
}

/// String interner for reducing memory usage of duplicate strings
#[derive(Debug)]
pub struct StringInterner {
    /// Interned strings with reference counting
    strings: HashMap<String, (Arc<String>, usize)>,

    /// Statistics
    stats: InternerStats,
}

/// Statistics for string interner
#[derive(Debug, Clone, Default)]
pub struct InternerStats {
    /// Total strings interned
    pub total_interned: usize,

    /// Number of cache hits
    pub cache_hits: usize,

    /// Memory saved (estimated in bytes)
    pub memory_saved_bytes: usize,
}

/// Cache for expensive computations
#[derive(Debug)]
pub struct ComputationCache {
    /// Cached fingerprints for results
    fingerprint_cache: HashMap<String, String>,

    /// Cached file path resolutions
    path_cache: HashMap<String, String>,

    /// Cached rule lookups
    rule_cache: HashMap<String, Option<Arc<crate::types::ReportingDescriptor>>>,

    /// Cache statistics
    stats: CacheStats,
}

/// Cache usage statistics
#[derive(Debug, Clone, Default)]
pub struct CacheStats {
    /// Cache hits by type
    pub hits_by_type: HashMap<String, usize>,

    /// Cache misses by type
    pub misses_by_type: HashMap<String, usize>,

    /// Cache evictions by type
    pub evictions_by_type: HashMap<String, usize>,
}

/// Benchmark suite for SARIF operations
pub struct SarifBenchmark {
    /// Test data sets of varying sizes
    pub test_datasets: Vec<BenchmarkDataset>,

    /// Benchmark results
    pub results: Vec<BenchmarkResult>,

    /// Configuration for benchmarks
    pub config: BenchmarkConfig,
}

/// Dataset for benchmarking
#[derive(Debug, Clone)]
pub struct BenchmarkDataset {
    /// Dataset name
    pub name: String,

    /// SARIF log for testing
    pub log: SarifLog,

    /// Expected characteristics
    pub characteristics: DatasetCharacteristics,
}

/// Characteristics of a benchmark dataset
#[derive(Debug, Clone)]
pub struct DatasetCharacteristics {
    /// Number of runs
    pub run_count: usize,

    /// Total number of results
    pub result_count: usize,

    /// Number of unique files
    pub file_count: usize,

    /// Number of unique rules
    pub rule_count: usize,

    /// Average results per file
    pub avg_results_per_file: f64,

    /// Estimated size in MB
    pub size_mb: f64,
}

/// Result of a benchmark operation
#[derive(Debug, Clone)]
pub struct BenchmarkResult {
    /// Operation name
    pub operation: String,

    /// Dataset used
    pub dataset: String,

    /// Execution time
    pub duration: Duration,

    /// Memory usage during operation
    pub memory_usage_mb: f64,

    /// Throughput metrics
    pub throughput: ThroughputMetrics,

    /// Additional metrics
    pub custom_metrics: HashMap<String, f64>,
}

/// Throughput metrics for benchmarks
#[derive(Debug, Clone)]
pub struct ThroughputMetrics {
    /// Results processed per second
    pub results_per_second: f64,

    /// Files processed per second
    pub files_per_second: f64,

    /// MB processed per second
    pub mb_per_second: f64,
}

/// Configuration for benchmark runs
#[derive(Debug, Clone)]
pub struct BenchmarkConfig {
    /// Number of iterations per benchmark
    pub iterations: usize,

    /// Warmup iterations before measurement
    pub warmup_iterations: usize,

    /// Enable memory profiling during benchmarks
    pub profile_memory: bool,

    /// Operations to benchmark
    pub operations: Vec<String>,
}

impl PerformanceMonitor {
    /// Create a new performance monitor
    pub fn new(config: PerformanceConfig) -> Self {
        Self {
            timings: HashMap::new(),
            memory_stats: MemoryStats::default(),
            counters: HashMap::new(),
            config,
        }
    }

    /// Start timing an operation
    pub fn start_timing(&mut self, operation: &str) -> TimingHandle {
        TimingHandle {
            operation: operation.to_string(),
            start_time: Instant::now(),
            monitor: self as *mut PerformanceMonitor,
        }
    }

    /// Record a timing measurement
    pub fn record_timing(&mut self, operation: &str, duration: Duration) {
        if self.config.enable_timing {
            self.timings
                .entry(operation.to_string())
                .or_insert_with(Vec::new)
                .push(duration);
        }
    }

    /// Increment a counter
    pub fn increment_counter(&mut self, counter: &str) {
        *self.counters.entry(counter.to_string()).or_insert(0) += 1;
    }

    /// Update memory statistics
    pub fn update_memory_stats(&mut self, operation: &str, memory_mb: f64) {
        if self.config.track_memory {
            self.memory_stats.current_memory_mb = memory_mb;
            if memory_mb > self.memory_stats.peak_memory_mb {
                self.memory_stats.peak_memory_mb = memory_mb;
            }
            self.memory_stats
                .memory_by_operation
                .insert(operation.to_string(), memory_mb);
            self.memory_stats.allocation_count += 1;
        }
    }

    /// Get average timing for an operation
    pub fn get_average_timing(&self, operation: &str) -> Option<Duration> {
        self.timings.get(operation).map(|times| {
            let total: Duration = times.iter().sum();
            total / times.len() as u32
        })
    }

    /// Get percentile timing for an operation
    pub fn get_percentile_timing(&self, operation: &str, percentile: f64) -> Option<Duration> {
        self.timings.get(operation).and_then(|times| {
            if times.is_empty() {
                return None;
            }
            let mut sorted_times = times.clone();
            sorted_times.sort();
            let index = ((percentile / 100.0) * times.len() as f64) as usize;
            sorted_times.get(index.min(times.len() - 1)).copied()
        })
    }

    /// Generate performance report
    pub fn generate_report(&self) -> PerformanceReport {
        let mut operation_stats = HashMap::new();

        for (operation, times) in &self.timings {
            if !times.is_empty() {
                let total: Duration = times.iter().sum();
                let avg = total / times.len() as u32;
                let min = *times.iter().min().unwrap();
                let max = *times.iter().max().unwrap();

                operation_stats.insert(
                    operation.clone(),
                    OperationStats {
                        count: times.len(),
                        total_duration: total,
                        average_duration: avg,
                        min_duration: min,
                        max_duration: max,
                        p50: self.get_percentile_timing(operation, 50.0).unwrap_or(avg),
                        p95: self.get_percentile_timing(operation, 95.0).unwrap_or(max),
                        p99: self.get_percentile_timing(operation, 99.0).unwrap_or(max),
                    },
                );
            }
        }

        PerformanceReport {
            operation_stats,
            memory_stats: self.memory_stats.clone(),
            counters: self.counters.clone(),
            total_operations: self.counters.values().sum(),
        }
    }
}

/// Handle for timing operations
pub struct TimingHandle {
    operation: String,
    start_time: Instant,
    monitor: *mut PerformanceMonitor,
}

impl Drop for TimingHandle {
    fn drop(&mut self) {
        let duration = self.start_time.elapsed();
        unsafe {
            if !self.monitor.is_null() {
                (*self.monitor).record_timing(&self.operation, duration);
            }
        }
    }
}

/// Performance report
#[derive(Debug, Clone)]
pub struct PerformanceReport {
    /// Statistics by operation
    pub operation_stats: HashMap<String, OperationStats>,

    /// Memory usage statistics
    pub memory_stats: MemoryStats,

    /// Counter values
    pub counters: HashMap<String, usize>,

    /// Total number of operations
    pub total_operations: usize,
}

/// Statistics for a specific operation
#[derive(Debug, Clone)]
pub struct OperationStats {
    /// Number of times operation was performed
    pub count: usize,

    /// Total time spent on operation
    pub total_duration: Duration,

    /// Average duration per operation
    pub average_duration: Duration,

    /// Minimum duration observed
    pub min_duration: Duration,

    /// Maximum duration observed
    pub max_duration: Duration,

    /// 50th percentile (median)
    pub p50: Duration,

    /// 95th percentile
    pub p95: Duration,

    /// 99th percentile
    pub p99: Duration,
}

impl<T> MemoryPool<T> {
    /// Create a new memory pool
    pub fn new<F>(factory: F, max_size: usize) -> Self
    where
        F: Fn() -> T + Send + Sync + 'static,
    {
        Self {
            pool: Vec::with_capacity(max_size),
            factory: Box::new(factory),
            max_size,
            stats: PoolStats::default(),
        }
    }

    /// Borrow an object from the pool
    pub fn borrow(&mut self) -> T {
        self.stats.borrowed.fetch_add(1, Ordering::Relaxed);

        if let Some(obj) = self.pool.pop() {
            obj
        } else {
            self.stats.created.fetch_add(1, Ordering::Relaxed);
            (self.factory)()
        }
    }

    /// Return an object to the pool
    pub fn return_object(&mut self, obj: T) {
        self.stats.returned.fetch_add(1, Ordering::Relaxed);

        if self.pool.len() < self.max_size {
            self.pool.push(obj);
            let current_size = self.pool.len();
            let peak = self.stats.peak_size.load(Ordering::Relaxed);
            if current_size > peak {
                self.stats.peak_size.store(current_size, Ordering::Relaxed);
            }
        }
    }

    /// Get pool statistics
    pub fn get_stats(&self) -> PoolStats {
        PoolStats {
            borrowed: AtomicUsize::new(self.stats.borrowed.load(Ordering::Relaxed)),
            returned: AtomicUsize::new(self.stats.returned.load(Ordering::Relaxed)),
            created: AtomicUsize::new(self.stats.created.load(Ordering::Relaxed)),
            peak_size: AtomicUsize::new(self.stats.peak_size.load(Ordering::Relaxed)),
        }
    }
}

impl Default for StringInterner {
    fn default() -> Self {
        Self::new()
    }
}

impl StringInterner {
    /// Create a new string interner
    pub fn new() -> Self {
        Self {
            strings: HashMap::new(),
            stats: InternerStats::default(),
        }
    }

    /// Intern a string, returning an Arc to the canonical version
    pub fn intern(&mut self, s: &str) -> Arc<String> {
        if let Some((arc_str, ref_count)) = self.strings.get_mut(s) {
            *ref_count += 1;
            self.stats.cache_hits += 1;
            return arc_str.clone();
        }

        let arc_str = Arc::new(s.to_string());
        self.strings.insert(s.to_string(), (arc_str.clone(), 1));
        self.stats.total_interned += 1;
        self.stats.memory_saved_bytes += s.len();

        arc_str
    }

    /// Release a reference to an interned string
    pub fn release(&mut self, s: &str) {
        if let Some((_, ref_count)) = self.strings.get_mut(s) {
            *ref_count -= 1;
            if *ref_count == 0 {
                self.strings.remove(s);
            }
        }
    }

    /// Get interner statistics
    pub fn get_stats(&self) -> &InternerStats {
        &self.stats
    }
}

impl Default for ComputationCache {
    fn default() -> Self {
        Self::new()
    }
}

impl ComputationCache {
    /// Create a new computation cache
    pub fn new() -> Self {
        Self {
            fingerprint_cache: HashMap::new(),
            path_cache: HashMap::new(),
            rule_cache: HashMap::new(),
            stats: CacheStats::default(),
        }
    }

    /// Get or compute a result fingerprint
    pub fn get_fingerprint<F>(&mut self, key: &str, compute_fn: F) -> String
    where
        F: FnOnce() -> String,
    {
        if let Some(fingerprint) = self.fingerprint_cache.get(key).cloned() {
            self.record_hit("fingerprint");
            return fingerprint;
        }

        self.record_miss("fingerprint");
        let fingerprint = compute_fn();
        self.fingerprint_cache
            .insert(key.to_string(), fingerprint.clone());
        fingerprint
    }

    /// Get or resolve a file path
    pub fn get_path<F>(&mut self, key: &str, resolve_fn: F) -> String
    where
        F: FnOnce() -> String,
    {
        if let Some(path) = self.path_cache.get(key).cloned() {
            self.record_hit("path");
            return path;
        }

        self.record_miss("path");
        let path = resolve_fn();
        self.path_cache.insert(key.to_string(), path.clone());
        path
    }

    /// Clear all caches
    pub fn clear(&mut self) {
        self.fingerprint_cache.clear();
        self.path_cache.clear();
        self.rule_cache.clear();
    }

    /// Get cache statistics
    pub fn get_stats(&self) -> &CacheStats {
        &self.stats
    }

    fn record_hit(&mut self, cache_type: &str) {
        *self
            .stats
            .hits_by_type
            .entry(cache_type.to_string())
            .or_insert(0) += 1;
    }

    fn record_miss(&mut self, cache_type: &str) {
        *self
            .stats
            .misses_by_type
            .entry(cache_type.to_string())
            .or_insert(0) += 1;
    }
}

impl SarifBenchmark {
    /// Create a new benchmark suite
    pub fn new(config: BenchmarkConfig) -> Self {
        Self {
            test_datasets: Vec::new(),
            results: Vec::new(),
            config,
        }
    }

    /// Add a test dataset
    pub fn add_dataset(&mut self, dataset: BenchmarkDataset) {
        self.test_datasets.push(dataset);
    }

    /// Run all benchmarks
    pub fn run_benchmarks(&mut self) -> ParseResult<Vec<BenchmarkResult>> {
        let mut all_results = Vec::new();

        for dataset in &self.test_datasets {
            for operation in &self.config.operations {
                let result = self.run_single_benchmark(operation, dataset)?;
                all_results.push(result);
            }
        }

        self.results = all_results.clone();
        Ok(all_results)
    }

    /// Run a single benchmark
    fn run_single_benchmark(
        &self,
        operation: &str,
        dataset: &BenchmarkDataset,
    ) -> ParseResult<BenchmarkResult> {
        let mut durations = Vec::new();
        let mut memory_usage = 0.0;

        // Warmup iterations
        for _ in 0..self.config.warmup_iterations {
            self.execute_operation(operation, dataset)?;
        }

        // Measured iterations
        for _ in 0..self.config.iterations {
            let start_memory = self.get_memory_usage();
            let start_time = Instant::now();

            self.execute_operation(operation, dataset)?;

            let duration = start_time.elapsed();
            let end_memory = self.get_memory_usage();

            durations.push(duration);
            memory_usage += end_memory - start_memory;
        }

        let avg_duration = durations.iter().sum::<Duration>() / durations.len() as u32;
        let avg_memory = memory_usage / self.config.iterations as f64;

        let throughput = ThroughputMetrics {
            results_per_second: dataset.characteristics.result_count as f64
                / avg_duration.as_secs_f64(),
            files_per_second: dataset.characteristics.file_count as f64
                / avg_duration.as_secs_f64(),
            mb_per_second: dataset.characteristics.size_mb / avg_duration.as_secs_f64(),
        };

        Ok(BenchmarkResult {
            operation: operation.to_string(),
            dataset: dataset.name.clone(),
            duration: avg_duration,
            memory_usage_mb: avg_memory,
            throughput,
            custom_metrics: HashMap::new(),
        })
    }

    /// Execute a specific operation on a dataset
    fn execute_operation(&self, operation: &str, dataset: &BenchmarkDataset) -> ParseResult<()> {
        match operation {
            "parse" => {
                // Simulate parsing by serializing and deserializing
                let json = serde_json::to_string(&dataset.log)
                    .map_err(|e| SarifError::custom(format!("Serialization error: {}", e)))?;
                let _: SarifLog = serde_json::from_str(&json)
                    .map_err(|e| SarifError::custom(format!("Deserialization error: {}", e)))?;
            }
            "index" => {
                // Simulate indexing
                let index = crate::utils::indexing::SarifIndex::from_sarif_log(&dataset.log);
                // Access some index data to ensure it's built
                let _ = index.stats.result_count;
            }
            "query" => {
                // Simulate querying
                let index = crate::utils::indexing::SarifIndex::from_sarif_log(&dataset.log);
                let executor =
                    crate::utils::query::SarifQueryExecutor::from_index(index, dataset.log.clone());
                let query = crate::utils::query::SarifQuery::default();
                let _ = executor.execute(&query)?;
            }
            "conversion" => {
                // Simulate conversion
                let converter = crate::utils::conversion::CsvConverter::new();
                let _ = converter.convert_to_csv(&dataset.log)?;
            }
            _ => {
                return Err(SarifError::custom(format!(
                    "Unknown operation: {}",
                    operation
                )));
            }
        }
        Ok(())
    }

    /// Get current memory usage (simplified implementation)
    fn get_memory_usage(&self) -> f64 {
        // In a real implementation, this would use system APIs to get actual memory usage
        // For now, return a placeholder value
        0.0
    }

    /// Generate a comprehensive benchmark report
    pub fn generate_report(&self) -> BenchmarkReport {
        BenchmarkReport {
            results: self.results.clone(),
            summary: self.generate_summary(),
            comparisons: self.generate_comparisons(),
        }
    }

    fn generate_summary(&self) -> BenchmarkSummary {
        let mut operation_summaries = HashMap::new();

        for result in &self.results {
            let summary = operation_summaries
                .entry(result.operation.clone())
                .or_insert_with(|| OperationSummary {
                    operation: result.operation.clone(),
                    total_runs: 0,
                    avg_duration: Duration::ZERO,
                    avg_throughput: 0.0,
                    avg_memory_mb: 0.0,
                });

            summary.total_runs += 1;
            summary.avg_duration += result.duration;
            summary.avg_throughput += result.throughput.results_per_second;
            summary.avg_memory_mb += result.memory_usage_mb;
        }

        // Calculate averages
        for summary in operation_summaries.values_mut() {
            summary.avg_duration /= summary.total_runs as u32;
            summary.avg_throughput /= summary.total_runs as f64;
            summary.avg_memory_mb /= summary.total_runs as f64;
        }

        BenchmarkSummary {
            operation_summaries,
            total_benchmarks: self.results.len(),
            fastest_operation: self.find_fastest_operation(),
            slowest_operation: self.find_slowest_operation(),
        }
    }

    fn generate_comparisons(&self) -> Vec<BenchmarkComparison> {
        // Generate comparisons between different datasets for the same operation
        let mut comparisons = Vec::new();

        for operation in &self.config.operations {
            let operation_results: Vec<_> = self
                .results
                .iter()
                .filter(|r| r.operation == *operation)
                .collect();

            if operation_results.len() > 1 {
                for i in 0..operation_results.len() {
                    for j in i + 1..operation_results.len() {
                        let baseline = operation_results[i];
                        let comparison = operation_results[j];

                        comparisons.push(BenchmarkComparison {
                            operation: operation.clone(),
                            baseline_dataset: baseline.dataset.clone(),
                            comparison_dataset: comparison.dataset.clone(),
                            duration_ratio: comparison.duration.as_secs_f64()
                                / baseline.duration.as_secs_f64(),
                            throughput_ratio: comparison.throughput.results_per_second
                                / baseline.throughput.results_per_second,
                            memory_ratio: comparison.memory_usage_mb / baseline.memory_usage_mb,
                        });
                    }
                }
            }
        }

        comparisons
    }

    fn find_fastest_operation(&self) -> Option<String> {
        self.results
            .iter()
            .min_by_key(|r| r.duration)
            .map(|r| r.operation.clone())
    }

    fn find_slowest_operation(&self) -> Option<String> {
        self.results
            .iter()
            .max_by_key(|r| r.duration)
            .map(|r| r.operation.clone())
    }
}

/// Comprehensive benchmark report
#[derive(Debug, Clone)]
pub struct BenchmarkReport {
    /// Individual benchmark results
    pub results: Vec<BenchmarkResult>,

    /// Summary statistics
    pub summary: BenchmarkSummary,

    /// Performance comparisons
    pub comparisons: Vec<BenchmarkComparison>,
}

/// Summary of benchmark results
#[derive(Debug, Clone)]
pub struct BenchmarkSummary {
    /// Summary by operation
    pub operation_summaries: HashMap<String, OperationSummary>,

    /// Total number of benchmarks run
    pub total_benchmarks: usize,

    /// Fastest operation overall
    pub fastest_operation: Option<String>,

    /// Slowest operation overall
    pub slowest_operation: Option<String>,
}

/// Summary for a specific operation
#[derive(Debug, Clone)]
pub struct OperationSummary {
    /// Operation name
    pub operation: String,

    /// Number of benchmark runs
    pub total_runs: usize,

    /// Average duration across all runs
    pub avg_duration: Duration,

    /// Average throughput across all runs
    pub avg_throughput: f64,

    /// Average memory usage across all runs
    pub avg_memory_mb: f64,
}

/// Comparison between benchmark results
#[derive(Debug, Clone)]
pub struct BenchmarkComparison {
    /// Operation being compared
    pub operation: String,

    /// Baseline dataset
    pub baseline_dataset: String,

    /// Comparison dataset
    pub comparison_dataset: String,

    /// Ratio of durations (comparison / baseline)
    pub duration_ratio: f64,

    /// Ratio of throughputs (comparison / baseline)
    pub throughput_ratio: f64,

    /// Ratio of memory usage (comparison / baseline)
    pub memory_ratio: f64,
}

// Default implementations

impl Default for PerformanceConfig {
    fn default() -> Self {
        Self {
            track_memory: true,
            enable_timing: true,
            memory_threshold_mb: 1024.0, // 1GB
            batch_size: 1000,
            enable_parallel: true,
            cache_config: CacheConfig::default(),
        }
    }
}

impl Default for CacheConfig {
    fn default() -> Self {
        Self {
            max_logs: 10,
            max_indexed_results: 100000,
            max_query_results: 10000,
            ttl_seconds: 3600, // 1 hour
        }
    }
}

impl Default for BenchmarkConfig {
    fn default() -> Self {
        Self {
            iterations: 10,
            warmup_iterations: 3,
            profile_memory: true,
            operations: vec![
                "parse".to_string(),
                "index".to_string(),
                "query".to_string(),
                "conversion".to_string(),
            ],
        }
    }
}

/// Create a test dataset for benchmarking
pub fn create_test_dataset(
    name: &str,
    run_count: usize,
    results_per_run: usize,
) -> BenchmarkDataset {
    use crate::builder::SarifLogBuilder;

    let mut log_builder = SarifLogBuilder::new();

    for run_idx in 0..run_count {
        let tool_name = format!("test-tool-{}", run_idx);
        let mut run_builder = crate::builder::RunBuilder::with_tool(&tool_name, Some("1.0.0"));

        for result_idx in 0..results_per_run {
            let message = format!("Test result {} from run {}", result_idx, run_idx);
            let file_path = format!("test/file{}.rs", result_idx % 10);
            let line = (result_idx % 100) as i32 + 1;

            let result = crate::builder::ResultBuilder::with_text_message(&message)
                .with_rule_id(format!("RULE{:03}", result_idx % 50))
                .add_file_location(&file_path, line, 1)
                .build();

            run_builder = run_builder.add_result(result);
        }

        log_builder = log_builder.add_run(run_builder.build());
    }

    let log = log_builder.build_unchecked();
    let characteristics = DatasetCharacteristics {
        run_count,
        result_count: run_count * results_per_run,
        file_count: 10, // We cycle through 10 files
        rule_count: 50, // We cycle through 50 rules
        avg_results_per_file: (run_count * results_per_run) as f64 / 10.0,
        size_mb: estimate_log_size(&log),
    };

    BenchmarkDataset {
        name: name.to_string(),
        log,
        characteristics,
    }
}

/// Estimate the size of a SARIF log in MB
fn estimate_log_size(log: &SarifLog) -> f64 {
    // Rough estimation based on JSON serialization size
    match serde_json::to_string(log) {
        Ok(json) => json.len() as f64 / (1024.0 * 1024.0),
        Err(_) => 0.0,
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_performance_monitor() {
        let config = PerformanceConfig::default();
        let mut monitor = PerformanceMonitor::new(config);

        // Test timing
        {
            let _handle = monitor.start_timing("test_operation");
            std::thread::sleep(std::time::Duration::from_millis(10));
        }

        // Test counter
        monitor.increment_counter("test_counter");
        monitor.increment_counter("test_counter");

        // Test memory tracking
        monitor.update_memory_stats("test_operation", 100.0);

        let report = monitor.generate_report();
        assert_eq!(report.counters["test_counter"], 2);
        assert!(report.operation_stats.contains_key("test_operation"));
        assert_eq!(report.memory_stats.current_memory_mb, 100.0);
    }

    #[test]
    fn test_memory_pool() {
        let mut pool = MemoryPool::new(|| String::new(), 5);

        // Borrow and return objects
        let obj1 = pool.borrow();
        let obj2 = pool.borrow();

        pool.return_object(obj1);
        pool.return_object(obj2);

        let stats = pool.get_stats();
        assert_eq!(stats.borrowed.load(Ordering::Relaxed), 2);
        assert_eq!(stats.returned.load(Ordering::Relaxed), 2);
    }

    #[test]
    fn test_string_interner() {
        let mut interner = StringInterner::new();

        let str1 = interner.intern("test");
        let str2 = interner.intern("test");

        assert!(Arc::ptr_eq(&str1, &str2));

        let stats = interner.get_stats();
        assert_eq!(stats.total_interned, 1);
        assert_eq!(stats.cache_hits, 1);
    }

    #[test]
    fn test_computation_cache() {
        let mut cache = ComputationCache::new();

        // Test fingerprint caching
        let fp1 = cache.get_fingerprint("key1", || "fingerprint1".to_string());
        let fp2 = cache.get_fingerprint("key1", || "fingerprint2".to_string());

        assert_eq!(fp1, "fingerprint1");
        assert_eq!(fp2, "fingerprint1"); // Should return cached value

        let stats = cache.get_stats();
        assert_eq!(stats.hits_by_type.get("fingerprint"), Some(&1));
        assert_eq!(stats.misses_by_type.get("fingerprint"), Some(&1));
    }

    #[test]
    fn test_benchmark_dataset_creation() {
        let dataset = create_test_dataset("test", 2, 10);

        assert_eq!(dataset.name, "test");
        assert_eq!(dataset.characteristics.run_count, 2);
        assert_eq!(dataset.characteristics.result_count, 20);
        assert_eq!(dataset.log.runs.len(), 2);
    }

    #[test]
    fn test_benchmark_execution() {
        let config = BenchmarkConfig {
            iterations: 2,
            warmup_iterations: 1,
            profile_memory: false,
            operations: vec!["parse".to_string()],
        };

        let mut benchmark = SarifBenchmark::new(config);
        let dataset = create_test_dataset("small", 1, 5);
        benchmark.add_dataset(dataset);

        let results = benchmark.run_benchmarks().unwrap();
        assert_eq!(results.len(), 1);
        assert_eq!(results[0].operation, "parse");
        assert_eq!(results[0].dataset, "small");
    }
}