ruvllm 2.2.1 - Docs.rs

#![allow(
    clippy::all,
    unused_imports,
    unused_variables,
    dead_code,
    unused_mut,
    unused_assignments,
    non_camel_case_types,
    clippy::approx_constant,
    unexpected_cfgs,
    unused_must_use,
    unused_parens
)]
//! Integration tests for v2.1 cross-platform features
//!
//! Tests cover:
//! - Platform-specific fallbacks
//! - WASM-specific detection and limitations
//! - Feature detection across platforms
//! - Graceful degradation
//! - Runtime capability checking

#![allow(non_camel_case_types)]

// =============================================================================
// Platform Types
// =============================================================================

/// Target platform
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Platform {
    MacOS,
    Linux,
    Windows,
    iOS,
    Android,
    WebAssembly,
    Unknown,
}

impl Platform {
    /// Detect current platform at compile time
    pub fn current() -> Self {
        #[cfg(target_os = "macos")]
        return Platform::MacOS;

        #[cfg(target_os = "linux")]
        return Platform::Linux;

        #[cfg(target_os = "windows")]
        return Platform::Windows;

        #[cfg(target_os = "ios")]
        return Platform::iOS;

        #[cfg(target_os = "android")]
        return Platform::Android;

        #[cfg(target_arch = "wasm32")]
        return Platform::WebAssembly;

        #[cfg(not(any(
            target_os = "macos",
            target_os = "linux",
            target_os = "windows",
            target_os = "ios",
            target_os = "android",
            target_arch = "wasm32"
        )))]
        return Platform::Unknown;
    }

    /// Check if platform supports Metal
    pub fn supports_metal(&self) -> bool {
        matches!(self, Platform::MacOS | Platform::iOS)
    }

    /// Check if platform supports CUDA
    pub fn supports_cuda(&self) -> bool {
        matches!(self, Platform::Linux | Platform::Windows)
    }

    /// Check if platform supports WebGPU
    pub fn supports_webgpu(&self) -> bool {
        matches!(
            self,
            Platform::MacOS | Platform::Linux | Platform::Windows | Platform::WebAssembly
        )
    }

    /// Check if platform supports native file I/O
    pub fn supports_native_io(&self) -> bool {
        !matches!(self, Platform::WebAssembly)
    }

    /// Check if platform supports multi-threading
    pub fn supports_threading(&self) -> bool {
        !matches!(self, Platform::WebAssembly)
    }

    /// Get maximum recommended batch size for platform
    pub fn max_recommended_batch_size(&self) -> usize {
        match self {
            Platform::MacOS | Platform::Linux | Platform::Windows => 64,
            Platform::iOS | Platform::Android => 16,
            Platform::WebAssembly => 4,
            Platform::Unknown => 1,
        }
    }
}

/// CPU architecture
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Architecture {
    X86_64,
    Aarch64,
    Wasm32,
    Unknown,
}

impl Architecture {
    /// Detect current architecture at compile time
    pub fn current() -> Self {
        #[cfg(target_arch = "x86_64")]
        return Architecture::X86_64;

        #[cfg(target_arch = "aarch64")]
        return Architecture::Aarch64;

        #[cfg(target_arch = "wasm32")]
        return Architecture::Wasm32;

        #[cfg(not(any(
            target_arch = "x86_64",
            target_arch = "aarch64",
            target_arch = "wasm32"
        )))]
        return Architecture::Unknown;
    }

    /// Check if architecture supports SIMD
    pub fn supports_simd(&self) -> bool {
        !matches!(self, Architecture::Unknown)
    }

    /// Get SIMD width in bytes
    pub fn simd_width(&self) -> usize {
        match self {
            Architecture::X86_64 => 32,  // AVX2
            Architecture::Aarch64 => 16, // NEON
            Architecture::Wasm32 => 16,  // SIMD128
            Architecture::Unknown => 0,
        }
    }
}

// =============================================================================
// CPU Features
// =============================================================================

/// CPU feature flags
#[derive(Debug, Clone, Default)]
pub struct CpuFeatures {
    // x86_64 features
    pub sse: bool,
    pub sse2: bool,
    pub sse3: bool,
    pub ssse3: bool,
    pub sse4_1: bool,
    pub sse4_2: bool,
    pub avx: bool,
    pub avx2: bool,
    pub avx512f: bool,
    pub avx512vl: bool,
    pub avx512vnni: bool,
    pub fma: bool,
    pub f16c: bool,

    // ARM features
    pub neon: bool,
    pub fp16: bool,
    pub dotprod: bool,
    pub i8mm: bool,
    pub sve: bool,
    pub sve2: bool,

    // WASM features
    pub simd128: bool,
    pub relaxed_simd: bool,
}

impl CpuFeatures {
    /// Detect CPU features at runtime
    pub fn detect() -> Self {
        let mut features = Self::default();

        #[cfg(target_arch = "x86_64")]
        {
            #[cfg(target_feature = "sse")]
            {
                features.sse = true;
            }
            #[cfg(target_feature = "sse2")]
            {
                features.sse2 = true;
            }
            #[cfg(target_feature = "sse3")]
            {
                features.sse3 = true;
            }
            #[cfg(target_feature = "ssse3")]
            {
                features.ssse3 = true;
            }
            #[cfg(target_feature = "sse4.1")]
            {
                features.sse4_1 = true;
            }
            #[cfg(target_feature = "sse4.2")]
            {
                features.sse4_2 = true;
            }
            #[cfg(target_feature = "avx")]
            {
                features.avx = true;
            }
            #[cfg(target_feature = "avx2")]
            {
                features.avx2 = true;
            }
            #[cfg(target_feature = "fma")]
            {
                features.fma = true;
            }
            #[cfg(target_feature = "f16c")]
            {
                features.f16c = true;
            }
        }

        #[cfg(target_arch = "aarch64")]
        {
            // NEON is always available on aarch64
            features.neon = true;

            #[cfg(target_feature = "fp16")]
            {
                features.fp16 = true;
            }
            #[cfg(target_feature = "dotprod")]
            {
                features.dotprod = true;
            }
        }

        #[cfg(target_arch = "wasm32")]
        {
            #[cfg(target_feature = "simd128")]
            {
                features.simd128 = true;
            }
            #[cfg(target_feature = "relaxed-simd")]
            {
                features.relaxed_simd = true;
            }
        }

        features
    }

    /// Create feature set for a mock x86_64 system with AVX2
    pub fn mock_x86_64_avx2() -> Self {
        Self {
            sse: true,
            sse2: true,
            sse3: true,
            ssse3: true,
            sse4_1: true,
            sse4_2: true,
            avx: true,
            avx2: true,
            fma: true,
            f16c: true,
            ..Default::default()
        }
    }

    /// Create feature set for a mock ARM system with NEON
    pub fn mock_aarch64_neon() -> Self {
        Self {
            neon: true,
            fp16: true,
            dotprod: true,
            ..Default::default()
        }
    }

    /// Create feature set for a mock WASM environment
    pub fn mock_wasm_simd() -> Self {
        Self {
            simd128: true,
            ..Default::default()
        }
    }

    /// Check if the system supports fast matrix operations
    pub fn supports_fast_matmul(&self) -> bool {
        self.avx2 || self.neon || self.simd128
    }

    /// Check if the system supports native FP16
    pub fn supports_native_fp16(&self) -> bool {
        self.f16c || self.fp16
    }

    /// Check if the system supports INT8 dot products
    pub fn supports_int8_dotprod(&self) -> bool {
        self.avx512vnni || self.dotprod || self.i8mm
    }
}

// =============================================================================
// GPU Capabilities
// =============================================================================

/// GPU backend type
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GpuBackend {
    Metal,
    Cuda,
    Vulkan,
    WebGPU,
    None,
}

/// GPU capabilities
#[derive(Debug, Clone)]
pub struct GpuCapabilities {
    pub backend: GpuBackend,
    pub device_name: String,
    pub compute_units: u32,
    pub memory_bytes: u64,
    pub supports_fp16: bool,
    pub supports_int8: bool,
    pub supports_bf16: bool,
    pub max_buffer_size: u64,
    pub max_workgroup_size: u32,
    pub unified_memory: bool,
}

impl GpuCapabilities {
    /// Create mock Metal capabilities (Apple Silicon)
    pub fn mock_metal_m4() -> Self {
        Self {
            backend: GpuBackend::Metal,
            device_name: "Apple M4 Pro".to_string(),
            compute_units: 20,
            memory_bytes: 48 * 1024 * 1024 * 1024, // 48GB unified
            supports_fp16: true,
            supports_int8: true,
            supports_bf16: true,
            max_buffer_size: 48 * 1024 * 1024 * 1024,
            max_workgroup_size: 1024,
            unified_memory: true,
        }
    }

    /// Create mock CUDA capabilities
    pub fn mock_cuda_4090() -> Self {
        Self {
            backend: GpuBackend::Cuda,
            device_name: "NVIDIA GeForce RTX 4090".to_string(),
            compute_units: 128,
            memory_bytes: 24 * 1024 * 1024 * 1024, // 24GB VRAM
            supports_fp16: true,
            supports_int8: true,
            supports_bf16: true,
            max_buffer_size: 24 * 1024 * 1024 * 1024,
            max_workgroup_size: 1024,
            unified_memory: false,
        }
    }

    /// Create mock WebGPU capabilities
    pub fn mock_webgpu() -> Self {
        Self {
            backend: GpuBackend::WebGPU,
            device_name: "WebGPU Device".to_string(),
            compute_units: 8,
            memory_bytes: 4 * 1024 * 1024 * 1024, // 4GB typical
            supports_fp16: true,
            supports_int8: false,
            supports_bf16: false,
            max_buffer_size: 2 * 1024 * 1024 * 1024, // 2GB buffer limit
            max_workgroup_size: 256,
            unified_memory: false,
        }
    }

    /// Create capabilities when no GPU is available
    pub fn none() -> Self {
        Self {
            backend: GpuBackend::None,
            device_name: "CPU Only".to_string(),
            compute_units: 0,
            memory_bytes: 0,
            supports_fp16: false,
            supports_int8: false,
            supports_bf16: false,
            max_buffer_size: 0,
            max_workgroup_size: 0,
            unified_memory: false,
        }
    }

    /// Check if GPU is available
    pub fn is_available(&self) -> bool {
        self.backend != GpuBackend::None
    }

    /// Calculate maximum model size that fits in memory
    pub fn max_model_size(&self) -> u64 {
        if self.unified_memory {
            self.memory_bytes * 9 / 10 // 90% of unified memory
        } else {
            self.memory_bytes * 8 / 10 // 80% of VRAM
        }
    }
}

// =============================================================================
// System Capabilities
// =============================================================================

/// Complete system capabilities
#[derive(Debug, Clone)]
pub struct SystemCapabilities {
    pub platform: Platform,
    pub architecture: Architecture,
    pub cpu_features: CpuFeatures,
    pub gpu: GpuCapabilities,
    pub system_memory_bytes: u64,
    pub cpu_cores: usize,
}

impl SystemCapabilities {
    /// Detect system capabilities
    pub fn detect() -> Self {
        Self {
            platform: Platform::current(),
            architecture: Architecture::current(),
            cpu_features: CpuFeatures::detect(),
            gpu: GpuCapabilities::none(), // Would need async detection
            system_memory_bytes: 0,       // Would need system calls
            cpu_cores: 1,                 // Would need system calls
        }
    }

    /// Create mock capabilities for Apple Silicon Mac
    pub fn mock_mac_m4() -> Self {
        Self {
            platform: Platform::MacOS,
            architecture: Architecture::Aarch64,
            cpu_features: CpuFeatures::mock_aarch64_neon(),
            gpu: GpuCapabilities::mock_metal_m4(),
            system_memory_bytes: 48 * 1024 * 1024 * 1024,
            cpu_cores: 14,
        }
    }

    /// Create mock capabilities for Linux with CUDA
    pub fn mock_linux_cuda() -> Self {
        Self {
            platform: Platform::Linux,
            architecture: Architecture::X86_64,
            cpu_features: CpuFeatures::mock_x86_64_avx2(),
            gpu: GpuCapabilities::mock_cuda_4090(),
            system_memory_bytes: 64 * 1024 * 1024 * 1024,
            cpu_cores: 16,
        }
    }

    /// Create mock capabilities for WebAssembly
    pub fn mock_wasm() -> Self {
        Self {
            platform: Platform::WebAssembly,
            architecture: Architecture::Wasm32,
            cpu_features: CpuFeatures::mock_wasm_simd(),
            gpu: GpuCapabilities::mock_webgpu(),
            system_memory_bytes: 4 * 1024 * 1024 * 1024, // Limited in browser
            cpu_cores: 4,                                // Typical worker count
        }
    }

    /// Create mock capabilities for CPU-only system
    pub fn mock_cpu_only() -> Self {
        Self {
            platform: Platform::Linux,
            architecture: Architecture::X86_64,
            cpu_features: CpuFeatures::mock_x86_64_avx2(),
            gpu: GpuCapabilities::none(),
            system_memory_bytes: 32 * 1024 * 1024 * 1024,
            cpu_cores: 8,
        }
    }

    /// Get the best available compute backend
    pub fn best_backend(&self) -> ComputeBackend {
        if self.gpu.is_available() {
            match self.gpu.backend {
                GpuBackend::Metal => ComputeBackend::Metal,
                GpuBackend::Cuda => ComputeBackend::Cuda,
                GpuBackend::WebGPU => ComputeBackend::WebGPU,
                _ => ComputeBackend::Cpu,
            }
        } else {
            ComputeBackend::Cpu
        }
    }
}

/// Compute backend selection
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ComputeBackend {
    Metal,
    Cuda,
    WebGPU,
    Cpu,
}

// =============================================================================
// Fallback System
// =============================================================================

/// Backend fallback chain
pub struct FallbackChain {
    backends: Vec<ComputeBackend>,
}

impl FallbackChain {
    /// Create a fallback chain for the given capabilities
    pub fn for_capabilities(caps: &SystemCapabilities) -> Self {
        let mut backends = Vec::new();

        // Add GPU backend if available
        if caps.gpu.is_available() {
            backends.push(caps.best_backend());
        }

        // Add CPU as final fallback
        if !backends.contains(&ComputeBackend::Cpu) {
            backends.push(ComputeBackend::Cpu);
        }

        Self { backends }
    }

    /// Get the primary backend
    pub fn primary(&self) -> ComputeBackend {
        self.backends
            .first()
            .copied()
            .unwrap_or(ComputeBackend::Cpu)
    }

    /// Get all backends in order
    pub fn all(&self) -> &[ComputeBackend] {
        &self.backends
    }

    /// Check if a backend is available
    pub fn has(&self, backend: ComputeBackend) -> bool {
        self.backends.contains(&backend)
    }

    /// Get fallback for a failed backend
    pub fn fallback_for(&self, failed: ComputeBackend) -> Option<ComputeBackend> {
        let pos = self.backends.iter().position(|&b| b == failed)?;
        self.backends.get(pos + 1).copied()
    }
}

// =============================================================================
// WASM-Specific Utilities
// =============================================================================

/// WASM-specific limitations and workarounds
pub struct WasmLimitations {
    /// Maximum memory in bytes (due to 32-bit address space)
    pub max_memory: u64,
    /// Whether SharedArrayBuffer is available (for threading)
    pub has_shared_memory: bool,
    /// Whether SIMD128 is available
    pub has_simd: bool,
    /// Whether atomics are available
    pub has_atomics: bool,
    /// Maximum single allocation size
    pub max_allocation: u64,
}

impl WasmLimitations {
    /// Create with typical browser limitations
    pub fn typical_browser() -> Self {
        Self {
            max_memory: 4 * 1024 * 1024 * 1024, // 4GB
            has_shared_memory: false,           // Requires COOP/COEP headers
            has_simd: true,
            has_atomics: false,
            max_allocation: 2 * 1024 * 1024 * 1024, // 2GB single alloc
        }
    }

    /// Create with enhanced browser limitations (with headers)
    pub fn enhanced_browser() -> Self {
        Self {
            max_memory: 4 * 1024 * 1024 * 1024,
            has_shared_memory: true,
            has_simd: true,
            has_atomics: true,
            max_allocation: 2 * 1024 * 1024 * 1024,
        }
    }

    /// Create for Node.js environment
    pub fn nodejs() -> Self {
        Self {
            max_memory: 4 * 1024 * 1024 * 1024,
            has_shared_memory: true,
            has_simd: true,
            has_atomics: true,
            max_allocation: 2 * 1024 * 1024 * 1024,
        }
    }

    /// Check if multi-threading is possible
    pub fn can_multithread(&self) -> bool {
        self.has_shared_memory && self.has_atomics
    }

    /// Get recommended thread count
    pub fn recommended_threads(&self) -> usize {
        if self.can_multithread() {
            4 // Typical worker count in browsers
        } else {
            1
        }
    }

    /// Calculate maximum model size given limitations
    pub fn max_model_size(&self) -> u64 {
        // Leave headroom for runtime and other allocations
        self.max_memory * 7 / 10 // 70% of max memory
    }
}

// =============================================================================
// Configuration Generator
// =============================================================================

/// Optimal configuration for a given system
#[derive(Debug, Clone)]
pub struct OptimalConfig {
    pub backend: ComputeBackend,
    pub batch_size: usize,
    pub context_length: usize,
    pub thread_count: usize,
    pub quantization: QuantizationType,
    pub use_flash_attention: bool,
    pub use_kv_cache: bool,
    pub memory_mapped_weights: bool,
}

/// Quantization type
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum QuantizationType {
    F32,
    F16,
    BF16,
    Q8_0,
    Q4_0,
    Q4_K,
}

impl OptimalConfig {
    /// Generate optimal configuration for given capabilities
    pub fn for_capabilities(caps: &SystemCapabilities, model_size_bytes: u64) -> Self {
        let backend = caps.best_backend();

        // Determine quantization based on model size and memory
        let available_memory = if caps.gpu.is_available() {
            caps.gpu.max_model_size()
        } else {
            caps.system_memory_bytes * 7 / 10
        };

        let quantization = if model_size_bytes <= available_memory {
            if caps.cpu_features.supports_native_fp16() || caps.gpu.supports_fp16 {
                QuantizationType::F16
            } else {
                QuantizationType::F32
            }
        } else if model_size_bytes / 2 <= available_memory {
            QuantizationType::Q8_0
        } else {
            QuantizationType::Q4_K
        };

        // Determine batch size
        let batch_size = caps.platform.max_recommended_batch_size();

        // Context length based on memory
        let context_length = match backend {
            ComputeBackend::Metal => 8192,
            ComputeBackend::Cuda => 8192,
            ComputeBackend::WebGPU => 2048,
            ComputeBackend::Cpu => 4096,
        };

        // Thread count
        let thread_count = if caps.platform.supports_threading() {
            caps.cpu_cores.min(8)
        } else {
            1
        };

        // Flash attention availability
        let use_flash_attention = matches!(backend, ComputeBackend::Metal | ComputeBackend::Cuda);

        // Memory mapping (not available in WASM)
        let memory_mapped_weights = caps.platform.supports_native_io();

        Self {
            backend,
            batch_size,
            context_length,
            thread_count,
            quantization,
            use_flash_attention,
            use_kv_cache: true,
            memory_mapped_weights,
        }
    }

    /// Generate WASM-specific configuration
    pub fn for_wasm(limits: &WasmLimitations, model_size_bytes: u64) -> Self {
        let quantization = if model_size_bytes <= limits.max_model_size() {
            QuantizationType::F16
        } else if model_size_bytes / 2 <= limits.max_model_size() {
            QuantizationType::Q8_0
        } else {
            QuantizationType::Q4_K
        };

        Self {
            backend: ComputeBackend::WebGPU,
            batch_size: 4,
            context_length: 2048,
            thread_count: limits.recommended_threads(),
            quantization,
            use_flash_attention: false,
            use_kv_cache: true,
            memory_mapped_weights: false, // Not available in WASM
        }
    }
}

// =============================================================================
// Tests
// =============================================================================

#[cfg(test)]
mod tests {
    use super::*;

    // -------------------------------------------------------------------------
    // Platform Tests
    // -------------------------------------------------------------------------

    #[test]
    fn test_platform_detection() {
        let platform = Platform::current();
        // Just verify it returns something valid
        assert!(matches!(
            platform,
            Platform::MacOS
                | Platform::Linux
                | Platform::Windows
                | Platform::iOS
                | Platform::Android
                | Platform::WebAssembly
                | Platform::Unknown
        ));
    }

    #[test]
    fn test_platform_metal_support() {
        assert!(Platform::MacOS.supports_metal());
        assert!(Platform::iOS.supports_metal());
        assert!(!Platform::Linux.supports_metal());
        assert!(!Platform::Windows.supports_metal());
        assert!(!Platform::WebAssembly.supports_metal());
    }

    #[test]
    fn test_platform_cuda_support() {
        assert!(Platform::Linux.supports_cuda());
        assert!(Platform::Windows.supports_cuda());
        assert!(!Platform::MacOS.supports_cuda());
        assert!(!Platform::WebAssembly.supports_cuda());
    }

    #[test]
    fn test_platform_webgpu_support() {
        assert!(Platform::MacOS.supports_webgpu());
        assert!(Platform::Linux.supports_webgpu());
        assert!(Platform::Windows.supports_webgpu());
        assert!(Platform::WebAssembly.supports_webgpu());
        assert!(!Platform::iOS.supports_webgpu());
    }

    #[test]
    fn test_platform_native_io() {
        assert!(Platform::MacOS.supports_native_io());
        assert!(Platform::Linux.supports_native_io());
        assert!(!Platform::WebAssembly.supports_native_io());
    }

    #[test]
    fn test_platform_threading() {
        assert!(Platform::MacOS.supports_threading());
        assert!(Platform::Linux.supports_threading());
        assert!(!Platform::WebAssembly.supports_threading());
    }

    #[test]
    fn test_platform_batch_sizes() {
        assert!(Platform::MacOS.max_recommended_batch_size() >= 32);
        assert!(Platform::iOS.max_recommended_batch_size() <= 32);
        assert!(Platform::WebAssembly.max_recommended_batch_size() <= 8);
    }

    // -------------------------------------------------------------------------
    // Architecture Tests
    // -------------------------------------------------------------------------

    #[test]
    fn test_architecture_detection() {
        let arch = Architecture::current();
        assert!(matches!(
            arch,
            Architecture::X86_64
                | Architecture::Aarch64
                | Architecture::Wasm32
                | Architecture::Unknown
        ));
    }

    #[test]
    fn test_architecture_simd_support() {
        assert!(Architecture::X86_64.supports_simd());
        assert!(Architecture::Aarch64.supports_simd());
        assert!(Architecture::Wasm32.supports_simd());
        assert!(!Architecture::Unknown.supports_simd());
    }

    #[test]
    fn test_architecture_simd_width() {
        assert_eq!(Architecture::X86_64.simd_width(), 32); // AVX2
        assert_eq!(Architecture::Aarch64.simd_width(), 16); // NEON
        assert_eq!(Architecture::Wasm32.simd_width(), 16); // SIMD128
        assert_eq!(Architecture::Unknown.simd_width(), 0);
    }

    // -------------------------------------------------------------------------
    // CPU Features Tests
    // -------------------------------------------------------------------------

    #[test]
    fn test_cpu_features_x86_64_mock() {
        let features = CpuFeatures::mock_x86_64_avx2();
        assert!(features.sse);
        assert!(features.sse2);
        assert!(features.avx);
        assert!(features.avx2);
        assert!(features.fma);
    }

    #[test]
    fn test_cpu_features_aarch64_mock() {
        let features = CpuFeatures::mock_aarch64_neon();
        assert!(features.neon);
        assert!(features.fp16);
        assert!(features.dotprod);
    }

    #[test]
    fn test_cpu_features_wasm_mock() {
        let features = CpuFeatures::mock_wasm_simd();
        assert!(features.simd128);
        assert!(!features.avx2);
        assert!(!features.neon);
    }

    #[test]
    fn test_cpu_features_fast_matmul() {
        let x86 = CpuFeatures::mock_x86_64_avx2();
        assert!(x86.supports_fast_matmul());

        let arm = CpuFeatures::mock_aarch64_neon();
        assert!(arm.supports_fast_matmul());

        let wasm = CpuFeatures::mock_wasm_simd();
        assert!(wasm.supports_fast_matmul());

        let none = CpuFeatures::default();
        assert!(!none.supports_fast_matmul());
    }

    #[test]
    fn test_cpu_features_native_fp16() {
        let x86 = CpuFeatures::mock_x86_64_avx2();
        assert!(x86.supports_native_fp16()); // f16c

        let arm = CpuFeatures::mock_aarch64_neon();
        assert!(arm.supports_native_fp16()); // fp16

        let wasm = CpuFeatures::mock_wasm_simd();
        assert!(!wasm.supports_native_fp16());
    }

    // -------------------------------------------------------------------------
    // GPU Capabilities Tests
    // -------------------------------------------------------------------------

    #[test]
    fn test_gpu_metal_mock() {
        let gpu = GpuCapabilities::mock_metal_m4();
        assert_eq!(gpu.backend, GpuBackend::Metal);
        assert!(gpu.unified_memory);
        assert!(gpu.supports_fp16);
        assert!(gpu.supports_bf16);
    }

    #[test]
    fn test_gpu_cuda_mock() {
        let gpu = GpuCapabilities::mock_cuda_4090();
        assert_eq!(gpu.backend, GpuBackend::Cuda);
        assert!(!gpu.unified_memory);
        assert!(gpu.supports_fp16);
    }

    #[test]
    fn test_gpu_webgpu_mock() {
        let gpu = GpuCapabilities::mock_webgpu();
        assert_eq!(gpu.backend, GpuBackend::WebGPU);
        assert!(gpu.supports_fp16);
        assert!(!gpu.supports_int8); // Typically not supported
    }

    #[test]
    fn test_gpu_none() {
        let gpu = GpuCapabilities::none();
        assert_eq!(gpu.backend, GpuBackend::None);
        assert!(!gpu.is_available());
    }

    #[test]
    fn test_gpu_max_model_size() {
        let metal = GpuCapabilities::mock_metal_m4();
        let cuda = GpuCapabilities::mock_cuda_4090();

        // Unified memory allows larger models
        assert!(metal.max_model_size() > cuda.max_model_size());
    }

    // -------------------------------------------------------------------------
    // System Capabilities Tests
    // -------------------------------------------------------------------------

    #[test]
    fn test_system_capabilities_mac() {
        let caps = SystemCapabilities::mock_mac_m4();
        assert_eq!(caps.platform, Platform::MacOS);
        assert_eq!(caps.architecture, Architecture::Aarch64);
        assert_eq!(caps.best_backend(), ComputeBackend::Metal);
    }

    #[test]
    fn test_system_capabilities_linux_cuda() {
        let caps = SystemCapabilities::mock_linux_cuda();
        assert_eq!(caps.platform, Platform::Linux);
        assert_eq!(caps.architecture, Architecture::X86_64);
        assert_eq!(caps.best_backend(), ComputeBackend::Cuda);
    }

    #[test]
    fn test_system_capabilities_wasm() {
        let caps = SystemCapabilities::mock_wasm();
        assert_eq!(caps.platform, Platform::WebAssembly);
        assert_eq!(caps.architecture, Architecture::Wasm32);
        assert_eq!(caps.best_backend(), ComputeBackend::WebGPU);
    }

    #[test]
    fn test_system_capabilities_cpu_only() {
        let caps = SystemCapabilities::mock_cpu_only();
        assert_eq!(caps.best_backend(), ComputeBackend::Cpu);
    }

    // -------------------------------------------------------------------------
    // Fallback Chain Tests
    // -------------------------------------------------------------------------

    #[test]
    fn test_fallback_chain_metal() {
        let caps = SystemCapabilities::mock_mac_m4();
        let chain = FallbackChain::for_capabilities(&caps);

        assert_eq!(chain.primary(), ComputeBackend::Metal);
        assert!(chain.has(ComputeBackend::Cpu));
        assert_eq!(
            chain.fallback_for(ComputeBackend::Metal),
            Some(ComputeBackend::Cpu)
        );
    }

    #[test]
    fn test_fallback_chain_cpu_only() {
        let caps = SystemCapabilities::mock_cpu_only();
        let chain = FallbackChain::for_capabilities(&caps);

        assert_eq!(chain.primary(), ComputeBackend::Cpu);
        assert_eq!(chain.all().len(), 1);
        assert_eq!(chain.fallback_for(ComputeBackend::Cpu), None);
    }

    #[test]
    fn test_fallback_chain_order() {
        let caps = SystemCapabilities::mock_linux_cuda();
        let chain = FallbackChain::for_capabilities(&caps);

        let backends = chain.all();
        assert_eq!(backends[0], ComputeBackend::Cuda);
        assert_eq!(backends[1], ComputeBackend::Cpu);
    }

    // -------------------------------------------------------------------------
    // WASM Limitations Tests
    // -------------------------------------------------------------------------

    #[test]
    fn test_wasm_limitations_typical() {
        let limits = WasmLimitations::typical_browser();
        assert!(!limits.has_shared_memory);
        assert!(!limits.can_multithread());
        assert_eq!(limits.recommended_threads(), 1);
    }

    #[test]
    fn test_wasm_limitations_enhanced() {
        let limits = WasmLimitations::enhanced_browser();
        assert!(limits.has_shared_memory);
        assert!(limits.has_atomics);
        assert!(limits.can_multithread());
        assert!(limits.recommended_threads() > 1);
    }

    #[test]
    fn test_wasm_limitations_nodejs() {
        let limits = WasmLimitations::nodejs();
        assert!(limits.can_multithread());
        assert!(limits.has_simd);
    }

    #[test]
    fn test_wasm_max_model_size() {
        let limits = WasmLimitations::typical_browser();
        let max_size = limits.max_model_size();
        assert!(max_size < limits.max_memory);
        assert!(max_size > 0);
    }

    // -------------------------------------------------------------------------
    // Optimal Configuration Tests
    // -------------------------------------------------------------------------

    #[test]
    fn test_optimal_config_mac() {
        let caps = SystemCapabilities::mock_mac_m4();
        let model_size = 7 * 1024 * 1024 * 1024; // 7B model (~7GB)

        let config = OptimalConfig::for_capabilities(&caps, model_size);

        assert_eq!(config.backend, ComputeBackend::Metal);
        assert!(config.use_flash_attention);
        assert!(config.memory_mapped_weights);
        assert!(config.thread_count > 1);
    }

    #[test]
    fn test_optimal_config_cuda() {
        let caps = SystemCapabilities::mock_linux_cuda();
        let model_size = 13 * 1024 * 1024 * 1024; // 13B model

        let config = OptimalConfig::for_capabilities(&caps, model_size);

        assert_eq!(config.backend, ComputeBackend::Cuda);
        assert!(config.use_flash_attention);
    }

    #[test]
    fn test_optimal_config_quantization_fallback() {
        let caps = SystemCapabilities::mock_cpu_only();
        let model_size = 70 * 1024 * 1024 * 1024; // 70B model - too large

        let config = OptimalConfig::for_capabilities(&caps, model_size);

        // Should fall back to aggressive quantization
        assert!(matches!(
            config.quantization,
            QuantizationType::Q4_0 | QuantizationType::Q4_K | QuantizationType::Q8_0
        ));
    }

    #[test]
    fn test_optimal_config_wasm() {
        let limits = WasmLimitations::typical_browser();
        let model_size = 2 * 1024 * 1024 * 1024; // 2B model

        let config = OptimalConfig::for_wasm(&limits, model_size);

        assert_eq!(config.backend, ComputeBackend::WebGPU);
        assert!(!config.use_flash_attention);
        assert!(!config.memory_mapped_weights);
        assert!(config.context_length <= 4096);
        assert!(config.batch_size <= 8);
    }

    #[test]
    fn test_optimal_config_small_model() {
        let caps = SystemCapabilities::mock_mac_m4();
        let model_size = 1 * 1024 * 1024 * 1024; // 1GB model

        let config = OptimalConfig::for_capabilities(&caps, model_size);

        // Small model should use FP16, not quantized
        assert!(matches!(
            config.quantization,
            QuantizationType::F16 | QuantizationType::F32
        ));
    }

    // -------------------------------------------------------------------------
    // Integration Tests
    // -------------------------------------------------------------------------

    #[test]
    fn test_full_detection_pipeline() {
        // Test the full detection -> configuration pipeline
        let caps = SystemCapabilities::detect();

        // Should always return valid values
        assert!(caps.cpu_cores == 0 || caps.cpu_cores >= 1);

        let chain = FallbackChain::for_capabilities(&caps);
        assert!(!chain.all().is_empty());

        // Generate config for a 7B model
        let config = OptimalConfig::for_capabilities(&caps, 7 * 1024 * 1024 * 1024);
        assert!(config.batch_size >= 1);
        assert!(config.context_length >= 512);
    }

    #[test]
    fn test_platform_specific_defaults() {
        // Test that each platform gets sensible defaults
        let platforms = vec![
            SystemCapabilities::mock_mac_m4(),
            SystemCapabilities::mock_linux_cuda(),
            SystemCapabilities::mock_wasm(),
            SystemCapabilities::mock_cpu_only(),
        ];

        for caps in platforms {
            let config = OptimalConfig::for_capabilities(&caps, 4 * 1024 * 1024 * 1024);

            // Basic sanity checks
            assert!(config.batch_size >= 1);
            assert!(config.context_length >= 512);
            assert!(config.thread_count >= 1);
            assert!(config.use_kv_cache); // Always enabled
        }
    }

    #[test]
    fn test_graceful_degradation() {
        // Start with high-end system
        let mut caps = SystemCapabilities::mock_linux_cuda();

        // Remove GPU
        caps.gpu = GpuCapabilities::none();

        let config = OptimalConfig::for_capabilities(&caps, 7 * 1024 * 1024 * 1024);

        // Should fall back to CPU
        assert_eq!(config.backend, ComputeBackend::Cpu);
        assert!(!config.use_flash_attention); // Not available on CPU
    }

    #[test]
    fn test_memory_constrained_config() {
        // Very limited memory
        let mut caps = SystemCapabilities::mock_cpu_only();
        caps.system_memory_bytes = 8 * 1024 * 1024 * 1024; // 8GB only

        // Try to load a large model
        let model_size = 30 * 1024 * 1024 * 1024; // 30GB

        let config = OptimalConfig::for_capabilities(&caps, model_size);

        // Should use aggressive quantization
        assert!(matches!(
            config.quantization,
            QuantizationType::Q4_0 | QuantizationType::Q4_K
        ));
    }
}