use scirs2_core::ndarray::{Array1, ArrayView1, ScalarOperand};
use scirs2_core::numeric::Float;
use std::fmt::Debug;
use std::marker::PhantomData;
use crate::error::Result;
use crate::optimizers::Optimizer;
#[derive(Debug, Clone)]
pub struct GpuConfig {
pub use_tensor_cores: bool,
pub use_mixed_precision: bool,
pub preferred_backend: Option<String>,
pub max_gpu_memory: Option<usize>,
pub track_memory: bool,
}
impl Default for GpuConfig {
fn default() -> Self {
Self {
use_tensor_cores: true,
use_mixed_precision: false,
preferred_backend: None,
max_gpu_memory: None,
track_memory: true,
}
}
}
pub struct GpuOptimizer<O, A>
where
O: Optimizer<A, scirs2_core::ndarray::Ix1>,
A: Float + ScalarOperand + Debug,
{
base_optimizer: O,
config: GpuConfig,
gpu_context: Option<GpuContextWrapper>,
_phantom: PhantomData<A>,
}
struct GpuContextWrapper {
available: bool,
backend: String,
}
impl<O, A> GpuOptimizer<O, A>
where
O: Optimizer<A, scirs2_core::ndarray::Ix1> + Clone,
A: Float + ScalarOperand + Debug,
{
pub fn new(base_optimizer: O, config: GpuConfig) -> Result<Self> {
let gpu_context = Self::initialize_gpu(&config)?;
Ok(Self {
base_optimizer,
config,
gpu_context: Some(gpu_context),
_phantom: PhantomData,
})
}
pub fn with_default_config(base_optimizer: O) -> Result<Self> {
Self::new(base_optimizer, GpuConfig::default())
}
fn initialize_gpu(config: &GpuConfig) -> Result<GpuContextWrapper> {
let backend = config
.preferred_backend
.clone()
.unwrap_or_else(|| "auto".to_string());
Ok(GpuContextWrapper {
available: true,
backend,
})
}
pub fn step(&mut self, params: &Array1<A>, gradients: &Array1<A>) -> Result<Array1<A>> {
if let Some(ref ctx) = self.gpu_context {
if ctx.available {
return self.step_gpu(params, gradients);
}
}
self.base_optimizer.step(params, gradients)
}
fn step_gpu(&mut self, params: &Array1<A>, gradients: &Array1<A>) -> Result<Array1<A>> {
self.base_optimizer.step(params, gradients)
}
pub fn to_gpu(&self, _data: &ArrayView1<A>) -> Result<()> {
Ok(())
}
pub fn from_gpu(&self) -> Result<Array1<A>> {
Err(crate::error::OptimError::InvalidConfig(
"GPU implementation not yet available".to_string(),
))
}
pub fn is_gpu_available(&self) -> bool {
self.gpu_context
.as_ref()
.map(|ctx| ctx.available)
.unwrap_or(false)
}
pub fn gpu_backend(&self) -> Option<&str> {
self.gpu_context.as_ref().map(|ctx| ctx.backend.as_str())
}
pub fn config(&self) -> &GpuConfig {
&self.config
}
pub fn set_use_tensor_cores(&mut self, enable: bool) {
self.config.use_tensor_cores = enable;
}
pub fn set_use_mixed_precision(&mut self, enable: bool) {
self.config.use_mixed_precision = enable;
}
pub fn estimate_gpu_memory(
num_params: usize,
dtype_size: usize,
optimizer_states: usize,
) -> usize {
num_params * dtype_size * (2 + optimizer_states)
}
}
#[derive(Debug, Clone)]
pub struct GpuMemoryStats {
pub total: usize,
pub used: usize,
pub free: usize,
pub optimizer_usage: usize,
}
impl GpuMemoryStats {
pub fn new(total: usize, used: usize) -> Self {
Self {
total,
used,
free: total.saturating_sub(used),
optimizer_usage: 0,
}
}
pub fn utilization_percent(&self) -> f64 {
if self.total == 0 {
0.0
} else {
(self.used as f64 / self.total as f64) * 100.0
}
}
}
pub struct GpuUtils;
impl GpuUtils {
pub fn detect_backends() -> Vec<String> {
vec!["auto".to_string()]
}
pub fn has_tensor_cores() -> bool {
false
}
pub fn device_count() -> usize {
0
}
pub fn memory_stats(device_id: usize) -> Result<GpuMemoryStats> {
let _ = device_id;
Ok(GpuMemoryStats::new(0, 0))
}
pub fn synchronize() -> Result<()> {
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::optimizers::SGD;
use scirs2_core::ndarray::Array1;
#[test]
fn test_gpu_config_default() {
let config = GpuConfig::default();
assert!(config.use_tensor_cores);
assert!(!config.use_mixed_precision);
assert!(config.track_memory);
}
#[test]
fn test_gpu_optimizer_creation() {
let optimizer = SGD::new(0.01);
let config = GpuConfig::default();
let gpu_opt = GpuOptimizer::new(optimizer, config);
assert!(gpu_opt.is_ok());
}
#[test]
fn test_gpu_optimizer_with_default_config() {
let optimizer = SGD::new(0.01);
let gpu_opt = GpuOptimizer::with_default_config(optimizer);
assert!(gpu_opt.is_ok());
}
#[test]
fn test_gpu_optimizer_step() {
let optimizer = SGD::new(0.01);
let mut gpu_opt = GpuOptimizer::with_default_config(optimizer).expect("unwrap failed");
let params = Array1::from_vec(vec![1.0, 2.0, 3.0]);
let grads = Array1::from_vec(vec![0.1, 0.2, 0.3]);
let result = gpu_opt.step(¶ms, &grads);
assert!(result.is_ok());
}
#[test]
fn test_gpu_availability() {
let optimizer = SGD::new(0.01);
let gpu_opt = GpuOptimizer::with_default_config(optimizer).expect("unwrap failed");
assert!(gpu_opt.is_gpu_available());
}
#[test]
fn test_gpu_backend() {
let optimizer = SGD::new(0.01);
let gpu_opt = GpuOptimizer::with_default_config(optimizer).expect("unwrap failed");
let backend = gpu_opt.gpu_backend();
assert!(backend.is_some());
}
#[test]
fn test_gpu_config_mutations() {
let optimizer = SGD::new(0.01);
let mut gpu_opt = GpuOptimizer::with_default_config(optimizer).expect("unwrap failed");
gpu_opt.set_use_tensor_cores(false);
assert!(!gpu_opt.config().use_tensor_cores);
gpu_opt.set_use_mixed_precision(true);
assert!(gpu_opt.config().use_mixed_precision);
}
#[test]
fn test_estimate_gpu_memory() {
let mem = GpuOptimizer::<SGD<f32>, f32>::estimate_gpu_memory(1_000_000, 4, 1);
assert_eq!(mem, 12_000_000);
let mem = GpuOptimizer::<SGD<f32>, f32>::estimate_gpu_memory(1_000_000, 4, 2);
assert_eq!(mem, 16_000_000); }
#[test]
fn test_gpu_memory_stats() {
let stats = GpuMemoryStats::new(1_000_000_000, 500_000_000);
assert_eq!(stats.total, 1_000_000_000);
assert_eq!(stats.used, 500_000_000);
assert_eq!(stats.free, 500_000_000);
assert_eq!(stats.utilization_percent(), 50.0);
}
#[test]
fn test_gpu_utils_detect_backends() {
let backends = GpuUtils::detect_backends();
assert!(!backends.is_empty());
}
#[test]
fn test_gpu_utils_synchronize() {
let result = GpuUtils::synchronize();
assert!(result.is_ok());
}
}