rustorch 0.6.29

//! Platform-specific optimizations
//! プラットフォーム特化最適化

use crate::error::{RusTorchError, RusTorchResult};
use std::env;

/// Platform optimization levels
/// プラットフォーム最適化レベル
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum OptimizationLevel {
    /// No optimization (O0)
    None,
    /// Basic optimization (O1)
    Basic,
    /// Standard optimization (O2)
    Standard,
    /// Aggressive optimization (O3)
    Aggressive,
}

/// Platform-specific features
/// プラットフォーム特化機能
#[derive(Debug, Clone)]
pub struct PlatformFeatures {
    /// Operating system
    pub os: String,
    /// Architecture
    pub arch: String,
    /// Number of CPU cores
    pub cpu_cores: usize,
    /// Total system memory (bytes)
    pub total_memory: usize,
    /// Cache line size
    pub cache_line_size: usize,
    /// Page size
    pub page_size: usize,
    /// Supports huge pages
    pub supports_huge_pages: bool,
    /// Supports prefetching
    pub supports_prefetch: bool,
}

/// Platform-specific optimizer
/// プラットフォーム特化最適化器
pub struct PlatformOptimizer {
    features: PlatformFeatures,
    optimization_level: OptimizationLevel,
    thread_pool_size: usize,
}

impl PlatformOptimizer {
    /// Create new platform optimizer
    /// 新規プラットフォーム最適化器作成
    pub fn new() -> Self {
        let features = Self::detect_features();
        let optimization_level = OptimizationLevel::Standard;
        let thread_pool_size = Self::calculate_optimal_threads(&features);

        PlatformOptimizer {
            features,
            optimization_level,
            thread_pool_size,
        }
    }

    /// Detect platform features
    /// プラットフォーム機能検出
    fn detect_features() -> PlatformFeatures {
        PlatformFeatures {
            os: env::consts::OS.to_string(),
            arch: env::consts::ARCH.to_string(),
            cpu_cores: num_cpus::get(),
            total_memory: Self::get_total_memory(),
            cache_line_size: Self::detect_cache_line_size(),
            page_size: Self::get_page_size(),
            supports_huge_pages: Self::check_huge_pages_support(),
            supports_prefetch: Self::check_prefetch_support(),
        }
    }

    /// Get total system memory
    /// 総システムメモリ取得
    fn get_total_memory() -> usize {
        #[cfg(target_os = "linux")]
        {
            // Linux memory detection would require sys-info crate
            8 * 1024 * 1024 * 1024
        }
        #[cfg(target_os = "macos")]
        {
            unsafe {
                let mut size: usize = std::mem::size_of::<i64>();
                let mut total_mem: i64 = 0;
                let mut mib = [6i32, 0i32]; // CTL_HW, HW_MEMSIZE

                libc::sysctl(
                    mib.as_mut_ptr(),
                    2,
                    &mut total_mem as *mut _ as *mut _,
                    &mut size,
                    std::ptr::null_mut(),
                    0,
                );

                if total_mem > 0 {
                    total_mem as usize
                } else {
                    8_usize
                        .saturating_mul(1024)
                        .saturating_mul(1024)
                        .saturating_mul(1024)
                }
            }
        }
        #[cfg(not(any(target_os = "linux", target_os = "macos")))]
        {
            // Default fallback: 8GB for other platforms
            8_usize
                .saturating_mul(1024)
                .saturating_mul(1024)
                .saturating_mul(1024)
        }
    }

    /// Detect cache line size
    /// キャッシュラインサイズ検出
    fn detect_cache_line_size() -> usize {
        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        {
            // Most x86/x86_64 processors have 64-byte cache lines
            64
        }

        #[cfg(target_arch = "aarch64")]
        {
            // Most ARM64 processors have 64 or 128-byte cache lines
            64
        }

        #[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
        {
            // Conservative default
            64
        }
    }

    /// Get system page size
    /// システムページサイズ取得
    fn get_page_size() -> usize {
        #[cfg(unix)]
        {
            unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize }
        }

        #[cfg(target_os = "windows")]
        {
            // Windows support would require winapi crate
            4096
        }

        #[cfg(not(any(unix, windows)))]
        {
            // Default page size
            4096
        }
    }

    /// Check huge pages support
    /// Huge Pagesサポート確認
    fn check_huge_pages_support() -> bool {
        #[cfg(target_os = "linux")]
        {
            std::path::Path::new("/sys/kernel/mm/transparent_hugepage/enabled").exists()
        }

        #[cfg(not(target_os = "linux"))]
        {
            false
        }
    }

    /// Check prefetch support
    /// プリフェッチサポート確認
    fn check_prefetch_support() -> bool {
        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        {
            true // x86/x86_64 generally supports prefetch
        }

        #[cfg(target_arch = "aarch64")]
        {
            true // ARM64 supports prefetch
        }

        #[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
        {
            false
        }
    }

    /// Calculate optimal thread pool size
    /// 最適スレッドプールサイズ計算
    fn calculate_optimal_threads(features: &PlatformFeatures) -> usize {
        // Use 75% of available cores for compute, leave some for system
        let compute_cores = (features.cpu_cores as f32 * 0.75) as usize;
        compute_cores.max(1)
    }

    /// Apply platform-specific memory alignment
    /// プラットフォーム特化メモリアライメント適用
    pub fn align_memory(&self, size: usize) -> usize {
        let alignment = self.features.cache_line_size;
        (size + alignment - 1) & !(alignment - 1)
    }

    /// Allocate aligned memory
    /// アライメント済みメモリ割り当て
    pub fn allocate_aligned<T>(&self, count: usize) -> RusTorchResult<Vec<T>>
    where
        T: Default + Clone,
    {
        let size = count * std::mem::size_of::<T>();
        let aligned_size = self.align_memory(size);
        let aligned_count = aligned_size / std::mem::size_of::<T>();

        Ok(vec![T::default(); aligned_count])
    }

    /// Apply prefetch hints for memory access
    /// メモリアクセスのプリフェッチヒント適用
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    pub unsafe fn prefetch_read<T>(ptr: *const T) {
        #[cfg(target_arch = "x86_64")]
        {
            use std::arch::x86_64::_mm_prefetch;
            _mm_prefetch(ptr as *const i8, 0); // _MM_HINT_T0
        }
    }

    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
    pub unsafe fn prefetch_read<T>(_ptr: *const T) {
        // No-op on unsupported platforms
    }

    /// Set thread affinity for better cache locality
    /// キャッシュ局所性向上のためのスレッドアフィニティ設定
    #[cfg(target_os = "linux")]
    pub fn set_thread_affinity(&self, _thread_id: usize) -> RusTorchResult<()> {
        // Thread affinity setting would require more complex implementation
        Ok(())
    }

    #[cfg(not(target_os = "linux"))]
    pub fn set_thread_affinity(&self, _thread_id: usize) -> RusTorchResult<()> {
        // Not supported on other platforms
        Ok(())
    }

    /// Get platform features
    /// プラットフォーム機能取得
    pub fn features(&self) -> &PlatformFeatures {
        &self.features
    }

    /// Get optimization level
    /// 最適化レベル取得
    pub fn optimization_level(&self) -> OptimizationLevel {
        self.optimization_level
    }

    /// Set optimization level
    /// 最適化レベル設定
    pub fn set_optimization_level(&mut self, level: OptimizationLevel) {
        self.optimization_level = level;
        // Adjust thread pool size based on optimization level
        match level {
            OptimizationLevel::None => self.thread_pool_size = 1,
            OptimizationLevel::Basic => self.thread_pool_size = self.features.cpu_cores / 2,
            OptimizationLevel::Standard => {
                self.thread_pool_size = Self::calculate_optimal_threads(&self.features)
            }
            OptimizationLevel::Aggressive => self.thread_pool_size = self.features.cpu_cores,
        }
    }

    /// Get optimal thread pool size
    /// 最適スレッドプールサイズ取得
    pub fn thread_pool_size(&self) -> usize {
        self.thread_pool_size
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_platform_detection() {
        let optimizer = PlatformOptimizer::new();
        let features = optimizer.features();

        println!("Platform features:");
        println!("  OS: {}", features.os);
        println!("  Architecture: {}", features.arch);
        println!("  CPU cores: {}", features.cpu_cores);
        println!(
            "  Total memory: {} GB",
            features.total_memory / (1024 * 1024 * 1024)
        );
        println!("  Cache line size: {} bytes", features.cache_line_size);
        println!("  Page size: {} bytes", features.page_size);
        println!("  Huge pages: {}", features.supports_huge_pages);
        println!("  Prefetch: {}", features.supports_prefetch);

        assert!(features.cpu_cores > 0);
        assert!(features.cache_line_size > 0);
        assert!(features.page_size > 0);
    }

    #[test]
    fn test_memory_alignment() {
        let optimizer = PlatformOptimizer::new();

        let unaligned = 100;
        let aligned = optimizer.align_memory(unaligned);

        assert!(aligned >= unaligned);
        assert_eq!(aligned % optimizer.features().cache_line_size, 0);
    }

    #[test]
    fn test_optimization_levels() {
        let mut optimizer = PlatformOptimizer::new();

        optimizer.set_optimization_level(OptimizationLevel::None);
        assert_eq!(optimizer.thread_pool_size(), 1);

        optimizer.set_optimization_level(OptimizationLevel::Aggressive);
        assert_eq!(optimizer.thread_pool_size(), optimizer.features().cpu_cores);
    }
}