pub struct TensorCoreOptimizer { /* private fields */ }Expand description
Tensor core enhanced optimizer
Implementations§
Source§impl TensorCoreOptimizer
impl TensorCoreOptimizer
Sourcepub fn new(config: TensorCoreConfig) -> Result<Self, GpuOptimError>
pub fn new(config: TensorCoreConfig) -> Result<Self, GpuOptimError>
Create new tensor core optimizer
Sourcepub fn optimize_layout(&mut self, m: usize, n: usize, k: usize) -> OptimalLayout
pub fn optimize_layout(&mut self, m: usize, n: usize, k: usize) -> OptimalLayout
Optimize matrix layout for tensor core operations
Sourcepub fn tensor_core_gemm<T: Float + Debug + Send + Sync + 'static>(
&self,
a: &Array2<T>,
b: &Array2<T>,
c: &mut Array2<T>,
alpha: T,
beta: T,
precision: TensorCorePrecision,
) -> Result<(), GpuOptimError>
pub fn tensor_core_gemm<T: Float + Debug + Send + Sync + 'static>( &self, a: &Array2<T>, b: &Array2<T>, c: &mut Array2<T>, alpha: T, beta: T, precision: TensorCorePrecision, ) -> Result<(), GpuOptimError>
Perform tensor core optimized matrix multiplication
Sourcepub fn fused_adam_tensor_core<T: Float + Debug + Send + Sync + 'static>(
&self,
params: &mut Array2<T>,
grads: &Array2<T>,
exp_avg: &mut Array2<T>,
exp_avg_sq: &mut Array2<T>,
adam_params: &AdamParams<T>,
) -> Result<(), GpuOptimError>
pub fn fused_adam_tensor_core<T: Float + Debug + Send + Sync + 'static>( &self, params: &mut Array2<T>, grads: &Array2<T>, exp_avg: &mut Array2<T>, exp_avg_sq: &mut Array2<T>, adam_params: &AdamParams<T>, ) -> Result<(), GpuOptimError>
Fused Adam update with tensor core optimization
Sourcepub fn get_tensor_core_info(&self) -> TensorCoreInfo
pub fn get_tensor_core_info(&self) -> TensorCoreInfo
Get tensor core capability information
Sourcepub fn create_mixed_precision_trainer(
&self,
) -> Result<MixedPrecisionTrainer, GpuOptimError>
pub fn create_mixed_precision_trainer( &self, ) -> Result<MixedPrecisionTrainer, GpuOptimError>
Automatic mixed precision trainer for optimizers
Sourcepub fn sparse_tensor_core_gemm<T: Float + Debug + Send + Sync + 'static>(
&self,
a: &Array2<T>,
b_sparse: &SparseTensorCoreMatrix<T>,
c: &mut Array2<T>,
alpha: T,
beta: T,
) -> Result<(), GpuOptimError>
pub fn sparse_tensor_core_gemm<T: Float + Debug + Send + Sync + 'static>( &self, a: &Array2<T>, b_sparse: &SparseTensorCoreMatrix<T>, c: &mut Array2<T>, alpha: T, beta: T, ) -> Result<(), GpuOptimError>
Sparse tensor core optimization for 2:4 structured sparsity
Sourcepub fn multi_batch_tensor_core_ops<T: Float + Debug + Send + Sync + 'static>(
&self,
batches: &[TensorCoreBatch<T>],
precision: TensorCorePrecision,
) -> Result<Vec<Array2<T>>, GpuOptimError>
pub fn multi_batch_tensor_core_ops<T: Float + Debug + Send + Sync + 'static>( &self, batches: &[TensorCoreBatch<T>], precision: TensorCorePrecision, ) -> Result<Vec<Array2<T>>, GpuOptimError>
Multi-batch tensor core operations for large-scale training
Sourcepub fn optimized_pipeline_gemm<T: Float + Debug + Send + Sync + 'static>(
&self,
operations: &[TensorCoreOperation<T>],
pipeline_config: PipelineOptimizationConfig,
) -> Result<Vec<Array2<T>>, GpuOptimError>
pub fn optimized_pipeline_gemm<T: Float + Debug + Send + Sync + 'static>( &self, operations: &[TensorCoreOperation<T>], pipeline_config: PipelineOptimizationConfig, ) -> Result<Vec<Array2<T>>, GpuOptimError>
Advanced pipeline optimization for tensor core operations
Sourcepub fn optimize_memory_access_patterns<T: Float + Debug + Send + Sync + 'static>(
&mut self,
matrices: &[Array2<T>],
) -> Result<Vec<OptimizedMatrix<T>>, GpuOptimError>
pub fn optimize_memory_access_patterns<T: Float + Debug + Send + Sync + 'static>( &mut self, matrices: &[Array2<T>], ) -> Result<Vec<OptimizedMatrix<T>>, GpuOptimError>
Dynamic memory coalescing optimization
Sourcepub fn adaptive_tensor_core_scheduling<T: Float + Debug + Send + Sync + 'static>(
&mut self,
workload: &TensorCoreWorkload<T>,
) -> Result<SchedulingPlan, GpuOptimError>
pub fn adaptive_tensor_core_scheduling<T: Float + Debug + Send + Sync + 'static>( &mut self, workload: &TensorCoreWorkload<T>, ) -> Result<SchedulingPlan, GpuOptimError>
Adaptive tensor core scheduling based on hardware utilization
Sourcepub fn benchmark_tensor_core_performance(
&self,
) -> Result<TensorCorePerformanceBenchmark, GpuOptimError>
pub fn benchmark_tensor_core_performance( &self, ) -> Result<TensorCorePerformanceBenchmark, GpuOptimError>
Benchmark tensor core performance for different configurations
Auto Trait Implementations§
impl Freeze for TensorCoreOptimizer
impl !RefUnwindSafe for TensorCoreOptimizer
impl !Send for TensorCoreOptimizer
impl !Sync for TensorCoreOptimizer
impl Unpin for TensorCoreOptimizer
impl !UnwindSafe for TensorCoreOptimizer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more