Enum AdjustLrFn

Source

pub enum AdjustLrFn {
    Original,
    MatchRmsAdamW,
}

Expand description

Learning rate adjustment method for Muon optimizer.

Muon adjusts the learning rate based on parameter shape to maintain consistent RMS across rectangular matrices.

§References

Original: Muon: An optimizer for hidden layers
Moonshot: Muon is Scalable for LLM Training

Variants§

§

Original

Keller Jordan’s original method: lr * sqrt(max(1, A/B))

This scales the learning rate based on the aspect ratio of the weight matrix, ensuring that tall matrices (more rows than columns) get proportionally larger learning rates.

§Example

For a [1024, 512] matrix: lr * sqrt(1024/512) = lr * 1.414

§

MatchRmsAdamW

Moonshot’s method: lr * 0.2 * sqrt(max(A, B))

This method is designed to match AdamW’s RMS, allowing Muon to directly reuse learning rates and weight decay values tuned for AdamW without retuning.

§Example

For a [1024, 512] matrix: lr * 0.2 * sqrt(1024) = lr * 6.4

Enum AdjustLrFn Copy item path

§References

Variants§

Original

§Example

MatchRmsAdamW

§Example

Trait Implementations§

impl Clone for AdjustLrFn

fn clone(&self) -> AdjustLrFn

fn clone_from(&mut self, source: &Self)

impl Debug for AdjustLrFn

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for AdjustLrFn

fn default() -> AdjustLrFn

impl<'de> Deserialize<'de> for AdjustLrFn

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where __D: Deserializer<'de>,

impl PartialEq for AdjustLrFn

fn eq(&self, other: &AdjustLrFn) -> bool

fn ne(&self, other: &Rhs) -> bool

impl Serialize for AdjustLrFn

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>where __S: Serializer,

impl Copy for AdjustLrFn

impl Eq for AdjustLrFn

impl StructuralPartialEq for AdjustLrFn

Auto Trait Implementations§

impl Freeze for AdjustLrFn

impl RefUnwindSafe for AdjustLrFn

impl Send for AdjustLrFn

impl Sync for AdjustLrFn

impl Unpin for AdjustLrFn

impl UnwindSafe for AdjustLrFn

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<Q, K> Equivalent<K> for Qwhere Q: Eq + ?Sized, K: Borrow<Q> + ?Sized,

fn equivalent(&self, key: &K) -> bool

impl<Q, K> Equivalent<K> for Qwhere Q: Eq + ?Sized, K: Borrow<Q> + ?Sized,

fn equivalent(&self, key: &K) -> bool

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

Enum AdjustLrFn

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<Q, K> Equivalent<K> for Q
where Q: Eq + ?Sized, K: Borrow<Q> + ?Sized,

impl<Q, K> Equivalent<K> for Q
where Q: Eq + ?Sized, K: Borrow<Q> + ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,