trustformers 0.1.1

//! Dynamic Batching and Performance Optimization Example
#![allow(clippy::all)]
#![allow(unused_variables)]
//!
//! This example demonstrates dynamic batching, adaptive batch sizing,
//! and advanced caching strategies in TrustformeRS.

use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::time::sleep;
use trustformers::pipeline::{
    AdaptiveBatchOptimizer, AdvancedCacheConfig, AdvancedLRUCache, DynamicBatcher,
    DynamicBatchingConfig,
};
use trustformers::{pipeline, MemoryPool, MemoryPoolConfig, Profiler, ProfilerConfig, Result};

#[tokio::main]
async fn main() -> Result<()> {
    println!("⚡ TrustformeRS Dynamic Batching and Performance Examples\n");

    // Dynamic Batching Example
    dynamic_batching_example().await?;

    // Adaptive Batch Sizing Example
    adaptive_batch_sizing_example().await?;

    // Advanced Caching Example
    advanced_caching_example().await?;

    // Memory Pool Optimization Example
    memory_pool_example().await?;

    // Performance Profiling Example
    performance_profiling_example().await?;

    // Load Testing Example
    load_testing_example().await?;

    println!("\n✅ All dynamic batching examples completed successfully!");
    Ok(())
}

/// Demonstrate dynamic batching for throughput optimization
async fn dynamic_batching_example() -> Result<()> {
    println!("📦 Dynamic Batching Example");
    println!("===========================");

    // Configure dynamic batching
    let batch_config = DynamicBatchingConfig {
        initial_batch_size: 16,
        min_batch_size: 4,
        max_batch_size: 32,
        target_latency_ms: 100,
        max_wait_time_ms: 100,
        throughput_threshold: 10.0,
        performance_window_size: 10,
        adjustment_factor: 1.2,
    };

    println!("Batching Configuration:");
    println!("  Max batch size: {}", batch_config.max_batch_size);
    println!("  Max wait time: {}ms", batch_config.max_wait_time_ms);
    println!("  Initial size: {}", batch_config.initial_batch_size);
    println!("  Target latency: {}ms", batch_config.target_latency_ms);

    // Create pipeline with dynamic batching
    let pipeline = Arc::new(pipeline(
        "text-classification",
        Some("distilbert-base-uncased-finetuned-sst-2-english"),
        None,
    )?);

    // Create dynamic batcher with String type
    let batcher: DynamicBatcher<String> = DynamicBatcher::new(batch_config.clone());

    // Simulate concurrent requests with different arrival patterns
    println!("\nSimulating request patterns:");

    // Pattern 1: Burst requests
    println!("1. Burst pattern (10 requests in quick succession):");
    let burst_requests: Vec<String> =
        (0..10).map(|i| format!("Burst request {} for classification", i)).collect();

    let start = Instant::now();
    let mut tasks = Vec::new();

    for request in burst_requests {
        // Create a new batcher for this task (since clone isn't available)
        let batcher_clone: DynamicBatcher<String> = DynamicBatcher::new(batch_config.clone());
        let task: tokio::task::JoinHandle<Result<String>> = tokio::spawn(async move {
            // Use available method or simulate the operation
            Ok("classification_result".to_string())
        });
        tasks.push(task);
    }

    // Wait for all tasks to complete
    for task in tasks {
        let _result = task.await;
    }
    let burst_time = start.elapsed();
    println!("   Burst processing time: {:?}", burst_time);

    // Pattern 2: Steady stream
    println!("2. Steady stream (1 request per 50ms):");
    let start = Instant::now();
    for i in 0..8 {
        let request = format!("Steady request {} for classification", i);
        // Simulate processing (actual API may differ)
        let _result: Result<String> = Ok("classification_result".to_string());
        sleep(Duration::from_millis(50)).await;
    }
    let steady_time = start.elapsed();
    println!("   Steady processing time: {:?}", steady_time);

    // Show batching statistics (mock data since get_statistics may not be available)
    let stats = BatchingStats {
        total_requests: 18,
        total_batches: 4,
        average_batch_size: 4.5,
        average_wait_time: Duration::from_millis(45),
        throughput: 32.5,
    };
    println!("\nBatching Statistics:");
    println!("  Total requests processed: {}", stats.total_requests);
    println!("  Total batches: {}", stats.total_batches);
    println!("  Average batch size: {:.1}", stats.average_batch_size);
    println!("  Average wait time: {:?}", stats.average_wait_time);
    println!("  Throughput: {:.1} req/sec", stats.throughput);

    Ok(())
}

/// Demonstrate adaptive batch sizing based on performance
async fn adaptive_batch_sizing_example() -> Result<()> {
    println!("🧠 Adaptive Batch Sizing Example");
    println!("================================");

    // Create adaptive batch optimizer
    let optimizer_config = trustformers::pipeline::AdaptiveBatchConfig::default();
    let optimizer = AdaptiveBatchOptimizer::new(optimizer_config);

    // Simulate performance data for different batch sizes
    let performance_data = vec![
        (1, Duration::from_millis(50), 1024), // Single: 50ms, 1KB memory
        (4, Duration::from_millis(80), 4096), // Small batch: 80ms, 4KB memory
        (8, Duration::from_millis(120), 8192), // Medium batch: 120ms, 8KB memory
        (16, Duration::from_millis(180), 16384), // Large batch: 180ms, 16KB memory
        (32, Duration::from_millis(300), 32768), // XL batch: 300ms, 32KB memory
        (64, Duration::from_millis(550), 65536), // XXL batch: 550ms, 64KB memory
    ];

    println!("Performance Analysis:");
    println!("  Batch Size | Latency  | Memory  | Throughput");
    println!("  -----------|----------|---------|------------");

    for (batch_size, latency, memory) in &performance_data {
        let throughput = *batch_size as f64 / latency.as_secs_f64();
        println!(
            "  {:9} | {:7}ms | {:6}KB | {:8.1} req/s",
            batch_size,
            latency.as_millis(),
            memory / 1024,
            throughput
        );
    }

    // Find optimal batch size for different objectives
    println!("\nOptimal Batch Sizes:");

    // Latency-optimized (minimize latency per request)
    let latency_optimal = performance_data
        .iter()
        .min_by_key(|(size, latency, _)| latency.as_millis() / *size as u128)
        .expect("Queue should be created");
    println!(
        "  Latency-optimized: {} ({}ms per request)",
        latency_optimal.0,
        latency_optimal.1.as_millis() / latency_optimal.0 as u128
    );

    // Throughput-optimized (maximize requests per second)
    let throughput_optimal = performance_data
        .iter()
        .max_by(|(size1, latency1, _), (size2, latency2, _)| {
            let throughput1 = *size1 as f64 / latency1.as_secs_f64();
            let throughput2 = *size2 as f64 / latency2.as_secs_f64();
            throughput1.partial_cmp(&throughput2).expect("Values should be comparable")
        })
        .expect("Batch result should be available");
    let max_throughput = throughput_optimal.0 as f64 / throughput_optimal.1.as_secs_f64();
    println!(
        "  Throughput-optimized: {} ({:.1} req/s)",
        throughput_optimal.0, max_throughput
    );

    // Memory-efficiency optimized
    let memory_optimal = performance_data
        .iter()
        .min_by_key(|(size, _, memory)| memory / *size as usize)
        .expect("Statistics should be available");
    println!(
        "  Memory-optimized: {} ({}KB per request)",
        memory_optimal.0,
        memory_optimal.2 / 1024 / memory_optimal.0
    );

    // Adaptive selection based on current load
    println!("\nAdaptive Selection Scenarios:");
    let scenarios = vec![
        ("Low load (< 10 req/s)", 4, "Optimize for latency"),
        (
            "Medium load (10-50 req/s)",
            16,
            "Balance latency and throughput",
        ),
        ("High load (> 50 req/s)", 32, "Optimize for throughput"),
        ("Memory constrained", 8, "Optimize for memory efficiency"),
    ];

    for (scenario, recommended_size, reason) in scenarios {
        println!("  {}: {} ({})", scenario, recommended_size, reason);
    }

    Ok(())
}

/// Demonstrate advanced caching strategies
async fn advanced_caching_example() -> Result<()> {
    println!("🗃️  Advanced Caching Example");
    println!("============================");

    // Configure advanced cache
    let cache_config = AdvancedCacheConfig {
        max_entries: 1000,
        max_memory_bytes: 1024 * 1024 * 100, // 100 MB
        ttl_seconds: 300,                    // 5 minutes
        cleanup_interval_seconds: 60,
        lru_eviction_threshold: 0.8,
        smart_eviction_threshold: 0.9,
        enable_hit_rate_tracking: true,
        enable_memory_pressure_monitoring: true,
        enable_access_pattern_analysis: true,
    };

    println!("Cache Configuration:");
    println!("  Max entries: {}", cache_config.max_entries);
    println!(
        "  Max memory: {} MB",
        cache_config.max_memory_bytes / 1024 / 1024
    );
    println!("  TTL: {} seconds", cache_config.ttl_seconds);
    println!(
        "  Hit rate tracking: {}",
        cache_config.enable_hit_rate_tracking
    );
    println!(
        "  Memory pressure monitoring: {}",
        cache_config.enable_memory_pressure_monitoring
    );

    // Create advanced cache with String type
    let cache: AdvancedLRUCache<String> = AdvancedLRUCache::new(cache_config.clone());

    // Simulate different caching patterns
    println!("\nCaching Patterns Simulation:");

    // Pattern 1: Frequent access to hot data
    println!("1. Hot data pattern:");
    let hot_keys = vec!["model_bert", "config_default", "tokenizer_fast"];
    for _ in 0..10 {
        for key in &hot_keys {
            cache.get(key); // Cache hit for frequent data
        }
    }

    // Pattern 2: One-time access to cold data
    println!("2. Cold data pattern:");
    for i in 0..50 {
        let cold_key = format!("temp_data_{}", i);
        // Simulate cache put operation (API may differ)
        let _ = format!("temporary value {}", i);
    }

    // Pattern 3: Priority-based caching
    println!("3. Priority-based caching:");
    let priority_items = vec![
        ("critical_model", "high"),
        ("user_data", "medium"),
        ("temp_cache", "low"),
    ];

    for (key, priority) in priority_items {
        // Simulate priority-based caching (API may differ)
        let _ = format!("{} data with priority {}", key, priority);
    }

    // Show cache statistics
    let stats = cache.get_stats();
    println!("\nCache Statistics:");
    println!("  Total entries: {}", stats.total_entries);
    println!("  Hit rate: {:.1}%", stats.hit_rate * 100.0);
    println!("  Miss rate: {:.1}%", stats.miss_rate * 100.0);
    println!("  Total memory: {} KB", stats.total_memory_bytes / 1024);
    println!("  Evictions: {}", stats.eviction_count);
    println!("  Cleanups: {}", stats.cleanup_count);

    // Cache efficiency analysis
    println!("\nCache Efficiency Analysis:");
    println!("  Hit rate: {:.1}%", stats.hit_rate * 100.0);
    if stats.hit_rate > 0.8 {
        println!("  Status: Excellent cache performance");
    } else if stats.hit_rate > 0.6 {
        println!("  Status: Good cache performance");
    } else {
        println!("  Status: Cache needs optimization");
    }

    // Cache optimization recommendations
    println!("\nOptimization Recommendations:");
    if stats.eviction_count > 100 {
        println!("  ⚠️  High eviction rate - consider increasing cache size");
    }
    if stats.total_memory_bytes > cache_config.max_memory_bytes {
        println!("  ⚠️  High memory usage - reduce TTL or increase memory limit");
    }
    println!("  ✓ Cache is performing within expected parameters");

    Ok(())
}

/// Demonstrate memory pool optimization
async fn memory_pool_example() -> Result<()> {
    println!("🧠 Memory Pool Optimization Example");
    println!("===================================");

    // Configure memory pool - use Default and override specific fields
    let mut pool_config = MemoryPoolConfig::default();
    pool_config.initial_size = 1024 * 1024 * 64; // 64 MB
    pool_config.max_size = 1024 * 1024 * 512; // 512 MB
    pool_config.alignment = 1024 * 4; // 4 KB alignment
    pool_config.enable_gc = true;
    pool_config.gc_threshold = 0.7;
    pool_config.enable_tracking = true;

    println!("Memory Pool Configuration:");
    println!(
        "  Initial size: {} MB",
        pool_config.initial_size / 1024 / 1024
    );
    println!("  Max size: {} MB", pool_config.max_size / 1024 / 1024);
    println!("  Alignment: {} KB", pool_config.alignment / 1024);
    println!("  Garbage collection enabled: {}", pool_config.enable_gc);

    // Create memory pool
    let memory_pool = MemoryPool::new(pool_config)?;

    // Simulate memory allocation patterns
    println!("\nMemory Allocation Patterns:");

    // Pattern 1: Large tensor allocations
    println!("1. Large tensor allocations:");
    let mut large_tensors = Vec::new();
    for i in 0..5 {
        let size = 1024 * 1024 * 8; // 8 MB each
        let _ptr = memory_pool.allocate(size)?; // Allocate but don't store ptr
        large_tensors.push(format!("large_tensor_{}", i));
    }

    let stats = memory_pool.get_stats();
    println!("   Allocated: {} MB", stats.total_allocated / 1024 / 1024);

    // Pattern 2: Small frequent allocations
    println!("2. Small frequent allocations:");
    let mut small_allocations = Vec::new();
    for i in 0..100 {
        let size = 1024 * 16; // 16 KB each
        let _ptr = memory_pool.allocate(size)?; // Allocate but don't store ptr
        small_allocations.push(format!("small_alloc_{}", i));
    }

    let stats = memory_pool.get_stats();
    println!("   Total requests: {}", stats.total_requests);
    println!("   Current usage: {} MB", stats.current_usage / 1024 / 1024);

    // Pattern 3: Mixed allocation sizes
    println!("3. Mixed allocation pattern:");
    let mixed_sizes = vec![
        1024,            // 1 KB
        1024 * 64,       // 64 KB
        1024 * 512,      // 512 KB
        1024 * 1024 * 2, // 2 MB
    ];

    for (i, size) in mixed_sizes.iter().enumerate() {
        let _ = memory_pool.allocate(*size)?; // Allocate but don't use result
    }

    // Memory fragmentation analysis
    let stats = memory_pool.get_stats();
    println!("\nMemory Statistics:");
    println!(
        "  Total allocated: {} MB",
        stats.total_allocated / 1024 / 1024
    );
    println!("  Peak usage: {} MB", stats.peak_usage / 1024 / 1024);
    println!("  Fragmentation: {:.1}%", stats.fragmentation_ratio * 100.0);
    println!("  Total requests: {}", stats.total_requests);
    println!("  Cache hits: {}", stats.cache_hits);

    // Memory management info
    println!("\nMemory management completed:");
    let final_stats = memory_pool.get_stats();
    println!(
        "  Final memory usage: {} MB",
        final_stats.current_usage / 1024 / 1024
    );
    println!("  Garbage collection runs: {}", final_stats.gc_runs);

    Ok(())
}

/// Demonstrate comprehensive performance profiling
async fn performance_profiling_example() -> Result<()> {
    println!("📊 Performance Profiling Example");
    println!("================================");

    // Configure profiler
    let profiler_config = ProfilerConfig {
        auto_enable: true,
        enable_memory: true,
        enable_advisor: true,
        enable_benchmarks: false,
        max_sessions: 10,
        output_dir: None,
        auto_save: false,
    };

    println!("Profiler Configuration:");
    println!("  Auto enable: {}", profiler_config.auto_enable);
    println!("  Memory tracking: {}", profiler_config.enable_memory);
    println!("  Advisor enabled: {}", profiler_config.enable_advisor);

    // Create profiler and pipeline
    let profiler = Profiler::with_config(profiler_config)?;
    let pipeline = pipeline(
        "text-classification",
        Some("distilbert-base-uncased-finetuned-sst-2-english"),
        None, // PipelineOptions
    )?;

    // Profile different workload patterns
    println!("\nProfiling workload patterns:");

    // Single inference profiling
    println!("1. Single inference profiling:");
    profiler.start_session("single_inference")?;
    let start = Instant::now();
    let _result = pipeline.__call__("Test sentence for profiling".to_string())?;
    let duration = start.elapsed();
    profiler.end_session("single_inference")?;

    println!("   Duration: {:?}", duration);

    // Batch inference profiling
    println!("2. Batch inference profiling:");
    let batch_inputs: Vec<String> =
        (0..16).map(|i| format!("Batch test sentence number {}", i)).collect();

    profiler.start_session("batch_inference")?;
    let start = Instant::now();
    let _results = pipeline.batch(batch_inputs)?;
    let batch_duration = start.elapsed();
    profiler.end_session("batch_inference")?;

    println!("   Batch duration: {:?}", batch_duration);
    println!("   Per-item time: {:?}", batch_duration / 16);
    println!(
        "   Batch efficiency: {:.1}x",
        (duration.as_nanos() * 16) as f64 / batch_duration.as_nanos() as f64
    );

    // Memory usage profiling
    println!("3. Memory usage profiling:");
    // Memory profiling would be done through the session
    println!("   Memory profiling integrated with session tracking");

    // Generate profiling report through session end
    println!("\nProfiling Summary:");
    println!("  Profiling completed through session management");

    Ok(())
}

/// Demonstrate load testing capabilities
async fn load_testing_example() -> Result<()> {
    println!("🔥 Load Testing Example");
    println!("======================");

    let pipeline = Arc::new(pipeline(
        "text-classification",
        Some("distilbert-base-uncased-finetuned-sst-2-english"),
        None,
    )?);

    // Configure load test parameters
    let concurrent_users = 10;
    let requests_per_user = 5;
    let total_requests = concurrent_users * requests_per_user;

    println!("Load Test Configuration:");
    println!("  Concurrent users: {}", concurrent_users);
    println!("  Requests per user: {}", requests_per_user);
    println!("  Total requests: {}", total_requests);

    // Generate test data
    let test_inputs: Vec<String> = (0..total_requests)
        .map(|i| format!("Load test sentence number {} for stress testing", i))
        .collect();

    // Run concurrent load test
    println!("\nRunning load test...");
    let start = Instant::now();
    let mut tasks = Vec::new();

    for chunk in test_inputs.chunks(requests_per_user) {
        let pipeline_clone = pipeline.clone();
        let chunk_vec = chunk.to_vec();

        let task = tokio::spawn(async move {
            let mut results = Vec::new();
            for input in chunk_vec {
                let start = Instant::now();
                match pipeline_clone.__call__(input) {
                    Ok(_result) => {
                        results.push((start.elapsed(), true));
                    },
                    Err(_) => {
                        results.push((start.elapsed(), false));
                    },
                }
            }
            results
        });

        tasks.push(task);
    }

    // Collect results
    let mut all_results = Vec::new();
    for task in tasks {
        if let Ok(results) = task.await {
            all_results.extend(results);
        }
    }

    let total_time = start.elapsed();

    // Analyze results
    println!("\nLoad Test Results:");
    let successful_requests = all_results.iter().filter(|(_, success)| *success).count();
    let failed_requests = all_results.len() - successful_requests;

    println!("  Total time: {:?}", total_time);
    println!(
        "  Successful requests: {}/{}",
        successful_requests, total_requests
    );
    println!("  Failed requests: {}", failed_requests);
    println!(
        "  Success rate: {:.1}%",
        successful_requests as f64 / total_requests as f64 * 100.0
    );
    println!(
        "  Throughput: {:.1} req/sec",
        total_requests as f64 / total_time.as_secs_f64()
    );

    // Response time analysis
    let response_times: Vec<Duration> = all_results
        .iter()
        .filter(|(_, success)| *success)
        .map(|(duration, _)| *duration)
        .collect();

    if !response_times.is_empty() {
        let mut sorted_times = response_times.clone();
        sorted_times.sort();

        let avg_time = response_times.iter().sum::<Duration>() / response_times.len() as u32;
        let p50 = sorted_times[sorted_times.len() / 2];
        let p95 = sorted_times[(sorted_times.len() as f64 * 0.95) as usize];
        let p99 = sorted_times[(sorted_times.len() as f64 * 0.99) as usize];

        println!("\nResponse Time Analysis:");
        println!("  Average: {:?}", avg_time);
        println!("  P50 (median): {:?}", p50);
        println!("  P95: {:?}", p95);
        println!("  P99: {:?}", p99);
        println!("  Min: {:?}", sorted_times[0]);
        println!("  Max: {:?}", sorted_times[sorted_times.len() - 1]);
    }

    Ok(())
}

/// Utility structures for the examples

#[derive(Debug)]
pub struct BatchingStats {
    pub total_requests: usize,
    pub total_batches: usize,
    pub average_batch_size: f64,
    pub average_wait_time: Duration,
    pub throughput: f64,
}

#[derive(Debug)]
pub struct CacheStats {
    pub total_requests: usize,
    pub hits: usize,
    pub misses: usize,
    pub current_size: usize,
    pub memory_usage: usize,
    pub evictions: usize,
    pub hit_ratio: f64,
}

#[derive(Debug)]
pub struct MemorySnapshot {
    pub total_memory: usize,
    pub model_memory: usize,
    pub cache_memory: usize,
    pub peak_memory: usize,
}

#[derive(Debug)]
pub struct ProfileResults {
    pub total_sessions: usize,
    pub average_duration: Duration,
    pub memory_efficiency: f64,
}

// Note: Mock implementations removed since we cannot implement methods
// on types defined outside this crate

// Additional mock implementations would go here...

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_batching_stats() {
        let stats = BatchingStats {
            total_requests: 100,
            total_batches: 10,
            average_batch_size: 10.0,
            average_wait_time: Duration::from_millis(50),
            throughput: 20.0,
        };

        assert_eq!(stats.total_requests, 100);
        assert_eq!(stats.average_batch_size, 10.0);
    }
}