optirs_gpu/lib.rs
1//! # OptiRS GPU - GPU Acceleration for ML Optimization
2//!
3//! **Version:** 0.1.0-rc.2
4//! **Status:** Framework Ready (GPU Kernels Coming Soon)
5//!
6//! `optirs-gpu` provides GPU acceleration for OptiRS optimizers, built on
7//! [SciRS2](https://github.com/cool-japan/scirs)'s GPU abstractions.
8//!
9//! ## Dependencies
10//!
11//! - `scirs2-core` 0.1.0-rc.4 - Required foundation
12//!
13//! ## Implementation Status (RC.2)
14//!
15//! - ✅ GPU context management
16//! - ✅ Multi-backend support framework (CUDA, Metal, OpenCL, WebGPU)
17//! - ✅ Memory transfer utilities
18//! - ✅ Configuration and initialization
19//! - 🚧 GPU kernels (in development)
20//! - 🚧 Tensor cores support (in development)
21//! - 📝 Multi-GPU coordination (planned)
22//!
23//! ## Status: Coming Soon
24//!
25//! This crate is under active development. GPU acceleration will leverage:
26//! - `scirs2_core::gpu` for GPU context and memory management
27//! - `scirs2_core::tensor_cores` for mixed-precision training
28//! - `scirs2_core::array_protocol::GPUArray` for zero-copy operations
29//!
30//! ## Planned Features
31//!
32//! ### Multi-Backend Support
33//! - **CUDA** - NVIDIA GPUs with full tensor core support
34//! - **Metal** - Apple Silicon M1/M2/M3 with unified memory
35//! - **OpenCL** - Cross-platform GPU compute
36//! - **WebGPU** - Browser and cross-platform support
37//!
38//! ### Performance Optimizations
39//! - **Tensor Cores** - FP16/BF16 mixed-precision training
40//! - **Memory Pools** - Advanced GPU memory management
41//! - **Kernel Fusion** - Optimized kernel execution
42//! - **Multi-GPU** - Distributed optimization across GPUs
43//!
44//! ### Expected Speedup
45//! - **10-50x** for large models (1M+ parameters)
46//! - **100x+** for very large models (100M+ parameters)
47//! - **Near-linear scaling** with multiple GPUs
48//!
49//! ## Example Usage (Future)
50//!
51//! ```rust,ignore
52//! use optirs_gpu::GpuOptimizer;
53//! use optirs::prelude::*;
54//! use scirs2_core::ndarray::Array1;
55//!
56//! // Create GPU-accelerated optimizer
57//! let optimizer = Adam::new(0.001);
58//! let mut gpu_opt = GpuOptimizer::new(optimizer)?;
59//!
60//! // Use like any optimizer - GPU acceleration is automatic
61//! let params = Array1::from_elem(1_000_000, 1.0);
62//! let grads = Array1::from_elem(1_000_000, 0.01);
63//! let updated = gpu_opt.step(¶ms, &grads)?;
64//! ```
65//!
66//! ## Architecture
67//!
68//! Built exclusively on SciRS2:
69//! - **GPU Context**: `scirs2_core::gpu::GpuContext`
70//! - **GPU Memory**: `scirs2_core::gpu::GpuBuffer`
71//! - **GPU Kernels**: `scirs2_core::gpu::GpuKernel`
72//! - **Tensor Cores**: `scirs2_core::tensor_cores`
73//! - **Zero-Copy**: `scirs2_core::array_protocol::GPUArray`
74//!
75//! ## Contributing
76//!
77//! GPU acceleration development follows SciRS2 integration guidelines.
78//! All GPU operations must use `scirs2_core::gpu` abstractions.
79
80use scirs2_core::gpu::GpuError;
81use scirs2_core::ndarray::{Array, Dimension};
82use scirs2_core::numeric::Float;
83
84pub mod backends;
85pub mod kernels;
86pub mod memory;
87pub mod multi_gpu;
88pub mod tensor_cores;
89pub mod utils;
90
91pub use backends::GpuBackend;
92pub use memory::MemoryPool;
93
94/// Error type for GPU optimizer operations
95#[derive(Debug, thiserror::Error)]
96pub enum GpuOptimError {
97 /// GPU backend error
98 #[error("GPU error: {0}")]
99 GpuError(#[from] GpuError),
100
101 /// Unsupported operation
102 #[error("Operation not supported: {0}")]
103 UnsupportedOperation(String),
104
105 /// Invalid state
106 #[error("Invalid optimizer state: {0}")]
107 InvalidState(String),
108
109 /// Dimension mismatch
110 #[error("Dimension mismatch: expected {expected:?}, got {actual:?}")]
111 DimensionMismatch {
112 expected: Vec<usize>,
113 actual: Vec<usize>,
114 },
115
116 /// Not initialized
117 #[error("GPU optimizer not initialized")]
118 NotInitialized,
119
120 /// CUDA not available
121 #[error("CUDA is not available on this system")]
122 CudaNotAvailable,
123}
124
125/// Trait for GPU-accelerated optimizers
126pub trait GpuOptimizer<A: Float, D: Dimension> {
127 /// Check if GPU acceleration is available
128 fn is_gpu_available(&self) -> bool;
129
130 /// Move optimizer state to GPU
131 fn to_gpu(&mut self) -> Result<(), GpuOptimError>;
132
133 /// Move optimizer state back to CPU
134 fn to_cpu(&mut self) -> Result<(), GpuOptimError>;
135
136 /// Perform optimization step on GPU
137 fn step_gpu(
138 &mut self,
139 params: &mut Array<A, D>,
140 gradients: &Array<A, D>,
141 ) -> Result<(), GpuOptimError>;
142}