Struct AdamW

Source

pub struct AdamW<A: Float + ScalarOperand + Debug> { /* private fields */ }

Expand description

AdamW optimizer

Implements the AdamW optimization algorithm from the paper: “Decoupled Weight Decay Regularization” by Loshchilov and Hutter (2019).

AdamW uses a more principled approach to weight decay compared to standard Adam. The key difference is that weight decay is applied directly to the weights, not within the adaptive learning rate computation.

Formula: m_t = beta1 * m_{t-1} + (1 - beta1) * g_t v_t = beta2 * v_{t-1} + (1 - beta2) * g_t^2 m_hat_t = m_t / (1 - beta1^t) v_hat_t = v_t / (1 - beta2^t) theta_t = theta_{t-1} * (1 - lr * weight_decay) - lr * m_hat_t / (sqrt(v_hat_t) + epsilon)

Note the decoupling of weight decay from the adaptive learning rate computation.

§Examples

use scirs2_core::ndarray::Array1;
use optirs_core::optimizers::{AdamW, Optimizer};

// Initialize parameters and gradients
let params = Array1::zeros(5);
let gradients = Array1::from_vec(vec![0.1, 0.2, -0.3, 0.0, 0.5]);

// Create an AdamW optimizer with default hyperparameters
let mut optimizer = AdamW::new(0.001);

// Update parameters
let new_params = optimizer.step(&params, &gradients).unwrap();

Struct AdamW Copy item path

§Examples

Implementations§

impl<A: Float + ScalarOperand + Debug + Send + Sync> AdamW<A>

pub fn new(learning_rate: A) -> Self

§Arguments

pub fn new_with_config( learning_rate: A, beta1: A, beta2: A, epsilon: A, weight_decay: A, ) -> Self

§Arguments

pub fn set_beta1(&mut self, beta1: A) -> &mut Self

pub fn get_beta1(&self) -> A

pub fn set_beta2(&mut self, beta2: A) -> &mut Self

pub fn get_beta2(&self) -> A

pub fn set_epsilon(&mut self, epsilon: A) -> &mut Self

pub fn get_epsilon(&self) -> A

pub fn set_weight_decay(&mut self, weight_decay: A) -> &mut Self

pub fn get_weight_decay(&self) -> A

pub fn learning_rate(&self) -> A

pub fn set_lr(&mut self, lr: A)

pub fn reset(&mut self)

Trait Implementations§

impl<A: Clone + Float + ScalarOperand + Debug> Clone for AdamW<A>

fn clone(&self) -> AdamW<A>

fn clone_from(&mut self, source: &Self)

impl<A: Debug + Float + ScalarOperand + Debug> Debug for AdamW<A>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<A, D> Optimizer<A, D> for AdamW<A>where A: Float + ScalarOperand + Debug + Send + Sync, D: Dimension,

fn step( &mut self, params: &Array<A, D>, gradients: &Array<A, D>, ) -> Result<Array<A, D>>

fn get_learning_rate(&self) -> A

fn set_learning_rate(&mut self, learning_rate: A)

fn step_list( &mut self, params_list: &[&Array<A, D>], gradients_list: &[&Array<A, D>], ) -> Result<Vec<Array<A, D>>>

Auto Trait Implementations§

impl<A> Freeze for AdamW<A>where A: Freeze,

impl<A> RefUnwindSafe for AdamW<A>where A: RefUnwindSafe,

impl<A> Send for AdamW<A>where A: Send,

impl<A> Sync for AdamW<A>where A: Sync,

impl<A> Unpin for AdamW<A>where A: Unpin,

impl<A> UnwindSafe for AdamW<A>where A: UnwindSafe + RefUnwindSafe,

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> DynClone for Twhere T: Clone,

fn __clone_box(&self, _: Private) -> *mut ()

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> Same for T

type Output = T

impl<SS, SP> SupersetOf<SS> for SPwhere SS: SubsetOf<SP>,

fn to_subset(&self) -> Option<SS>

fn is_in_subset(&self) -> bool

fn to_subset_unchecked(&self) -> SS

fn from_subset(element: &SS) -> SP

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

Struct AdamW

impl<A, D> Optimizer<A, D> for AdamW<A>
where A: Float + ScalarOperand + Debug + Send + Sync, D: Dimension,

impl<A> Freeze for AdamW<A>
where A: Freeze,

impl<A> RefUnwindSafe for AdamW<A>
where A: RefUnwindSafe,

impl<A> Send for AdamW<A>
where A: Send,

impl<A> Sync for AdamW<A>
where A: Sync,

impl<A> Unpin for AdamW<A>
where A: Unpin,

impl<A> UnwindSafe for AdamW<A>
where A: UnwindSafe + RefUnwindSafe,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T> DynClone for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<SS, SP> SupersetOf<SS> for SP
where SS: SubsetOf<SP>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,