Re-exports§
pub use adagrad::AdaGrad;pub use adagrad::AdaGradConfig;pub use adamw::AdamW;pub use adamw::AdamWConfig;pub use grad_accumulator::GradAccumulator;pub use grad_clip::clip_grad_norm;pub use grad_clip::clip_grad_norm_per_param;pub use grad_clip::clip_grad_value;pub use grad_scaler::GradScaler;pub use lamb::Lamb;pub use lamb::LambConfig;pub use lr_schedule::DecayShape;pub use lr_schedule::LrSchedule;pub use sgd::Sgd;pub use sgd::SgdConfig;pub use traits::Optimizer;
Modules§
- adagrad
- AdaGrad optimizer
- adamw
- AdamW optimizer
- grad_
accumulator - Gradient accumulation across micro-batches
- grad_
clip - Gradient clipping utilities
- grad_
scaler - Dynamic loss scaling for FP16 mixed precision training
- lamb
- LAMB optimizer (Layer-wise Adaptive Moments for Batch training)
- lr_
schedule - Learning rate schedulers
- sgd
- SGD optimizer with momentum
- traits
- Optimizer trait abstraction