amari 0.19.1 - Docs.rs

//! Edge Computing Integration Tests - TDD Phase 3
//!
//! Tests for automatic device dispatch, performance optimization,
//! and cross-platform edge computing capabilities.

use amari_core::Multivector;
use amari_info_geom::amari_chentsov_tensor;
use amari_gpu::GpuInfoGeometry;
use amari_wasm::AmariEdgeCompute;
use std::time::{Instant, Duration};

/// Test automatic device selection based on workload size
#[tokio::test]
async fn test_automatic_device_dispatch() {
    let edge_dispatcher = EdgeComputeDispatcher::new().await
        .expect("Should initialize edge dispatcher");

    // Test different workload sizes
    let workloads = [
        (10, "cpu"),        // Small: CPU
        (1000, "gpu"),      // Medium: GPU
        (100000, "gpu"),    // Large: GPU with batching
    ];

    for (batch_size, expected_device) in workloads {
        let (x_batch, y_batch, z_batch) = create_tensor_batch(batch_size);

        let dispatch_decision = edge_dispatcher
            .analyze_workload(&x_batch, &y_batch, &z_batch)
            .await
            .expect("Should analyze workload");

        assert_eq!(dispatch_decision.device_type(), expected_device,
                  "Wrong device choice for batch size {}", batch_size);

        // Execute and verify results
        let results = edge_dispatcher
            .execute_tensor_computation(&x_batch, &y_batch, &z_batch)
            .await
            .expect("Computation should succeed");

        assert_eq!(results.len(), batch_size);

        // Verify performance characteristics
        let performance = dispatch_decision.performance_metrics();
        if batch_size >= 1000 {
            assert!(performance.throughput() > 1000.0, // computations per second
                   "GPU should provide high throughput for large batches");
        }
    }
}

/// Test cross-platform performance comparison
#[tokio::test]
async fn test_cross_platform_performance() {
    let platforms = vec![
        ("cpu", "Standard CPU computation"),
        ("webgpu", "WebGPU acceleration"),
        ("wasm", "WebAssembly optimized"),
        ("hybrid", "CPU+GPU hybrid"),
    ];

    let test_batch_size = 5000;
    let (x_batch, y_batch, z_batch) = create_tensor_batch(test_batch_size);

    let mut performance_results = Vec::new();

    for (platform, description) in platforms {
        if let Ok(compute_engine) = EdgeComputeEngine::new_for_platform(platform).await {
            let start_time = Instant::now();

            let results = compute_engine
                .amari_chentsov_tensor_batch(&x_batch, &y_batch, &z_batch)
                .await
                .expect("Computation should succeed");

            let duration = start_time.elapsed();

            assert_eq!(results.len(), test_batch_size);

            // Verify correctness with spot checks
            for i in (0..test_batch_size).step_by(1000) {
                let cpu_result = amari_chentsov_tensor(&x_batch[i], &y_batch[i], &z_batch[i]);
                assert!((results[i] - cpu_result).abs() < 1e-10,
                       "Platform {} should produce correct results", platform);
            }

            performance_results.push((platform, duration, description));
            println!("{}: {} computations in {:?}", description, test_batch_size, duration);
        }
    }

    // Performance analysis
    performance_results.sort_by_key(|(_, duration, _)| *duration);

    if performance_results.len() >= 2 {
        let fastest = &performance_results[0];
        let slowest = &performance_results[performance_results.len() - 1];

        let speedup = slowest.1.as_secs_f64() / fastest.1.as_secs_f64();
        println!("Best speedup: {:.2}x ({} vs {})", speedup, fastest.0, slowest.0);

        // GPU/WebGPU should provide significant speedup for large batches
        if test_batch_size >= 1000 {
            assert!(speedup >= 2.0, "Should achieve at least 2x speedup with acceleration");
        }
    }
}

/// Test edge device memory constraints and optimization
#[tokio::test]
async fn test_edge_device_memory_optimization() {
    let edge_optimizer = EdgeMemoryOptimizer::new().await
        .expect("Should initialize memory optimizer");

    // Simulate different edge device memory constraints
    let memory_constraints = [
        (512_000_000, "mobile-device"),      // 512MB
        (2_000_000_000, "edge-server"),      // 2GB
        (8_000_000_000, "workstation"),      // 8GB
    ];

    for (memory_limit, device_type) in memory_constraints {
        edge_optimizer.set_memory_limit(memory_limit);

        // Find maximum batch size for this memory constraint
        let max_batch_size = edge_optimizer
            .find_optimal_batch_size_for_tensor_computation()
            .await
            .expect("Should find optimal batch size");

        println!("{}: max batch size {} for {}MB memory",
                device_type, max_batch_size, memory_limit / 1_000_000);

        // Test computation at this batch size
        let (x_batch, y_batch, z_batch) = create_tensor_batch(max_batch_size);

        let memory_before = edge_optimizer.get_memory_usage().await;

        let results = edge_optimizer
            .compute_tensor_batch_optimized(&x_batch, &y_batch, &z_batch)
            .await
            .expect("Optimized computation should succeed");

        let memory_after = edge_optimizer.get_memory_usage().await;

        assert_eq!(results.len(), max_batch_size);

        // Should not exceed memory limit
        assert!(memory_after <= memory_limit,
               "Memory usage should not exceed limit for {}", device_type);

        // Memory should be efficiently used
        let memory_efficiency = (memory_after - memory_before) as f64 / memory_limit as f64;
        assert!(memory_efficiency < 0.8, "Should use <80% of available memory");
    }
}

/// Test adaptive quality scaling based on device capabilities
#[tokio::test]
async fn test_adaptive_quality_scaling() {
    let quality_manager = AdaptiveQualityManager::new().await
        .expect("Should initialize quality manager");

    // Test different device capability levels
    let device_capabilities = [
        ("low-end", 0.25),    // 25% quality
        ("mid-range", 0.5),   // 50% quality
        ("high-end", 1.0),    // 100% quality
    ];

    let base_batch_size = 1000;

    for (device_class, quality_factor) in device_capabilities {
        quality_manager.set_device_class(device_class);

        let adapted_config = quality_manager
            .adapt_computation_config(base_batch_size, "amari-chentsov-tensor")
            .await
            .expect("Should adapt configuration");

        let expected_precision = match device_class {
            "low-end" => 1e-6,    // Reduced precision for speed
            "mid-range" => 1e-9,  // Standard precision
            "high-end" => 1e-12,  // Maximum precision
            _ => 1e-9,
        };

        assert_eq!(adapted_config.precision(), expected_precision);
        assert!(adapted_config.batch_size() <= (base_batch_size as f64 * quality_factor) as usize);

        // Test computation with adapted configuration
        let (x_batch, y_batch, z_batch) = create_tensor_batch(adapted_config.batch_size());

        let results = quality_manager
            .compute_with_adapted_quality(&x_batch, &y_batch, &z_batch, &adapted_config)
            .await
            .expect("Adaptive computation should work");

        assert_eq!(results.len(), adapted_config.batch_size());

        // Verify results meet the adapted precision requirements
        for i in 0..10.min(results.len()) {
            let reference = amari_chentsov_tensor(&x_batch[i], &y_batch[i], &z_batch[i]);
            assert!((results[i] - reference).abs() <= expected_precision * 10.0,
                   "Result should meet {} precision for {}", expected_precision, device_class);
        }
    }
}

/// Test edge computing fault tolerance and recovery
#[tokio::test]
async fn test_fault_tolerance_and_recovery() {
    let fault_tolerant_compute = FaultTolerantEdgeCompute::new().await
        .expect("Should initialize fault-tolerant compute");

    let test_batch_size = 1000;
    let (x_batch, y_batch, z_batch) = create_tensor_batch(test_batch_size);

    // Test various failure scenarios
    let failure_scenarios = [
        "gpu-memory-exhaustion",
        "webgpu-context-lost",
        "worker-thread-crash",
        "network-interruption",
    ];

    for scenario in failure_scenarios {
        println!("Testing fault tolerance for: {}", scenario);

        // Simulate failure
        fault_tolerant_compute.simulate_failure(scenario).await;

        // Computation should still succeed through fallback mechanisms
        let results = fault_tolerant_compute
            .compute_tensor_batch_with_recovery(&x_batch, &y_batch, &z_batch)
            .await
            .expect("Should recover from failure and complete computation");

        assert_eq!(results.len(), test_batch_size);

        // Verify results are still correct
        for i in (0..test_batch_size).step_by(100) {
            let reference = amari_chentsov_tensor(&x_batch[i], &y_batch[i], &z_batch[i]);
            assert!((results[i] - reference).abs() < 1e-9,
                   "Recovery should maintain computational correctness");
        }

        // Check that system recovered properly
        let recovery_status = fault_tolerant_compute.get_recovery_status().await;
        assert!(recovery_status.is_recovered(), "System should have recovered from {}", scenario);

        // Reset for next test
        fault_tolerant_compute.reset_failure_simulation().await;
    }
}

/// Test edge computing analytics and monitoring
#[tokio::test]
async fn test_edge_computing_analytics() {
    let analytics_engine = EdgeAnalyticsEngine::new().await
        .expect("Should initialize analytics engine");

    let test_runs = 10;
    let batch_size = 500;

    // Collect performance data over multiple runs
    for run in 0..test_runs {
        let (x_batch, y_batch, z_batch) = create_tensor_batch(batch_size);

        let _results = analytics_engine
            .compute_tensor_batch_with_analytics(&x_batch, &y_batch, &z_batch)
            .await
            .expect("Computation should succeed");

        // Add some variance to test analytics
        if run % 3 == 0 {
            tokio::time::sleep(Duration::from_millis(10)).await;
        }
    }

    // Analyze collected metrics
    let analytics = analytics_engine.get_performance_analytics().await
        .expect("Should get analytics");

    assert_eq!(analytics.total_computations(), test_runs * batch_size);
    assert!(analytics.average_computation_time() > Duration::from_nanos(1));
    assert!(analytics.throughput() > 0.0);

    // Performance should be consistent
    let coefficient_of_variation = analytics.std_deviation() / analytics.mean();
    assert!(coefficient_of_variation < 0.5, "Performance should be reasonably consistent");

    // Memory usage should be tracked
    let memory_stats = analytics.memory_statistics();
    assert!(memory_stats.peak_usage() > 0);
    assert!(memory_stats.average_usage() > 0);
    assert!(memory_stats.peak_usage() >= memory_stats.average_usage());

    // Device utilization should be tracked
    let device_stats = analytics.device_utilization();
    assert!(device_stats.gpu_utilization() >= 0.0);
    assert!(device_stats.gpu_utilization() <= 1.0);
}

// Helper types and functions for tests

struct EdgeComputeDispatcher {
    cpu_engine: Option<CpuComputeEngine>,
    gpu_engine: Option<GpuComputeEngine>,
    wasm_engine: Option<WasmComputeEngine>,
}

impl EdgeComputeDispatcher {
    async fn new() -> Result<Self, Box<dyn std::error::Error>> {
        Ok(Self {
            cpu_engine: Some(CpuComputeEngine::new()?),
            gpu_engine: GpuComputeEngine::new().await.ok(),
            wasm_engine: WasmComputeEngine::new().await.ok(),
        })
    }

    async fn analyze_workload(
        &self,
        x_batch: &[Multivector<3, 0, 0>],
        _y_batch: &[Multivector<3, 0, 0>],
        _z_batch: &[Multivector<3, 0, 0>],
    ) -> Result<WorkloadDispatchDecision, Box<dyn std::error::Error>> {
        let batch_size = x_batch.len();

        // Simple heuristic for device selection
        let device_type = if batch_size < 100 {
            "cpu"
        } else if self.gpu_engine.is_some() {
            "gpu"
        } else {
            "cpu"
        };

        Ok(WorkloadDispatchDecision::new(device_type, batch_size))
    }

    async fn execute_tensor_computation(
        &self,
        x_batch: &[Multivector<3, 0, 0>],
        y_batch: &[Multivector<3, 0, 0>],
        z_batch: &[Multivector<3, 0, 0>],
    ) -> Result<Vec<f64>, Box<dyn std::error::Error>> {
        let decision = self.analyze_workload(x_batch, y_batch, z_batch).await?;

        match decision.device_type() {
            "gpu" => {
                if let Some(ref gpu) = self.gpu_engine {
                    gpu.compute_tensor_batch(x_batch, y_batch, z_batch).await
                } else {
                    self.cpu_engine.as_ref().unwrap().compute_tensor_batch(x_batch, y_batch, z_batch)
                }
            }
            _ => {
                self.cpu_engine.as_ref().unwrap().compute_tensor_batch(x_batch, y_batch, z_batch)
            }
        }
    }
}

struct WorkloadDispatchDecision {
    device_type: String,
    batch_size: usize,
}

impl WorkloadDispatchDecision {
    fn new(device_type: &str, batch_size: usize) -> Self {
        Self {
            device_type: device_type.to_string(),
            batch_size,
        }
    }

    fn device_type(&self) -> &str {
        &self.device_type
    }

    fn performance_metrics(&self) -> PerformanceMetrics {
        // Simulated performance metrics
        let throughput = if self.device_type == "gpu" {
            self.batch_size as f64 * 2.0 // 2x speedup for GPU
        } else {
            self.batch_size as f64
        };

        PerformanceMetrics { throughput }
    }
}

struct PerformanceMetrics {
    throughput: f64,
}

impl PerformanceMetrics {
    fn throughput(&self) -> f64 {
        self.throughput
    }
}

// Placeholder implementations for test compilation
struct CpuComputeEngine;
struct GpuComputeEngine;
struct WasmComputeEngine;
struct EdgeComputeEngine;
struct EdgeMemoryOptimizer;
struct AdaptiveQualityManager;
struct FaultTolerantEdgeCompute;
struct EdgeAnalyticsEngine;

impl CpuComputeEngine {
    fn new() -> Result<Self, Box<dyn std::error::Error>> {
        Ok(Self)
    }

    fn compute_tensor_batch(
        &self,
        x_batch: &[Multivector<3, 0, 0>],
        y_batch: &[Multivector<3, 0, 0>],
        z_batch: &[Multivector<3, 0, 0>],
    ) -> Result<Vec<f64>, Box<dyn std::error::Error>> {
        let results: Vec<f64> = x_batch
            .iter()
            .zip(y_batch.iter())
            .zip(z_batch.iter())
            .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
            .collect();
        Ok(results)
    }
}

impl GpuComputeEngine {
    async fn new() -> Result<Self, Box<dyn std::error::Error>> {
        // Try to initialize GPU, may fail
        match GpuInfoGeometry::new().await {
            Ok(_) => Ok(Self),
            Err(e) => Err(Box::new(e)),
        }
    }

    async fn compute_tensor_batch(
        &self,
        x_batch: &[Multivector<3, 0, 0>],
        y_batch: &[Multivector<3, 0, 0>],
        z_batch: &[Multivector<3, 0, 0>],
    ) -> Result<Vec<f64>, Box<dyn std::error::Error>> {
        // Placeholder: would use actual GPU computation
        let results: Vec<f64> = x_batch
            .iter()
            .zip(y_batch.iter())
            .zip(z_batch.iter())
            .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
            .collect();
        Ok(results)
    }
}

impl WasmComputeEngine {
    async fn new() -> Result<Self, Box<dyn std::error::Error>> {
        Ok(Self)
    }
}

impl EdgeComputeEngine {
    async fn new_for_platform(_platform: &str) -> Result<Self, Box<dyn std::error::Error>> {
        Ok(Self)
    }

    async fn amari_chentsov_tensor_batch(
        &self,
        x_batch: &[Multivector<3, 0, 0>],
        y_batch: &[Multivector<3, 0, 0>],
        z_batch: &[Multivector<3, 0, 0>],
    ) -> Result<Vec<f64>, Box<dyn std::error::Error>> {
        let results: Vec<f64> = x_batch
            .iter()
            .zip(y_batch.iter())
            .zip(z_batch.iter())
            .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
            .collect();
        Ok(results)
    }
}

fn create_tensor_batch(size: usize) -> (Vec<Multivector<3, 0, 0>>, Vec<Multivector<3, 0, 0>>, Vec<Multivector<3, 0, 0>>) {
    let mut x_batch = Vec::with_capacity(size);
    let mut y_batch = Vec::with_capacity(size);
    let mut z_batch = Vec::with_capacity(size);

    for i in 0..size {
        let mut x = Multivector::zero();
        let mut y = Multivector::zero();
        let mut z = Multivector::zero();

        x.set_vector_component(0, 1.0 + (i as f64) * 0.01);
        y.set_vector_component(1, 1.0 + (i as f64) * 0.02);
        z.set_vector_component(2, 1.0 + (i as f64) * 0.03);

        x_batch.push(x);
        y_batch.push(y);
        z_batch.push(z);
    }

    (x_batch, y_batch, z_batch)
}

// Additional placeholder implementations would go here...
// (EdgeMemoryOptimizer, AdaptiveQualityManager, etc.)