Struct SoftmaxAssignmentSparsityPenalty

Source

pub struct SoftmaxAssignmentSparsityPenalty {
    pub k_atoms: usize,
    pub temperature: f64,
    pub weight: f64,
    pub weight_schedule: Option<ScalarWeightSchedule>,
}

Expand description

Entropy sparsity over row-wise softmax assignment logits.

This is the SAE-manifold soft-assignment penalty. The target is a flat row-major (N, K) logit matrix. Assignments are a_i = softmax(logits_i / temperature), and the penalty is

  lambda_sparse * sum_i H(a_i)
  H(a_i) = -sum_k a_ik log a_ik

Minimizing entropy drives each row toward a small active support while the softmax keeps a_ik >= 0 and sum_k a_ik = 1. The exact Hessian is dense in each row and can be indefinite because entropy is concave in assignment space, so callers must use the HVP rather than a diagonal Hessian shortcut.

Fields§

§k_atoms: usize§temperature: f64§weight: f64§weight_schedule: Option<ScalarWeightSchedule>

Struct SoftmaxAssignmentSparsityPenalty Copy item path

Fields§

Implementations§

impl SoftmaxAssignmentSparsityPenalty

pub fn new(k_atoms: usize, temperature: f64) -> Self

pub fn with_weight_schedule(self, schedule: ScalarWeightSchedule) -> Self

pub fn psd_majorizer_abs_row_sums(&self, row: &[f64], scale: f64) -> Vec<f64>

pub fn row_dense_hessian(&self, row_logits: &[f64], scale: f64) -> Array2<f64>

pub fn row_dense_hessian_logit_derivative( &self, row_logits: &[f64], scale: f64, w: usize, ) -> Array2<f64>

pub fn row_psd_majorizer(&self, row_logits: &[f64], scale: f64) -> Array2<f64>

pub fn row_psd_majorizer_logit_derivative( &self, row_logits: &[f64], scale: f64, w: usize, ) -> Array2<f64>

pub fn row_fisher_metric(&self, row_logits: &[f64], scale: f64) -> Array2<f64>

pub fn row_fisher_metric_logit_derivative( &self, row_logits: &[f64], scale: f64, w: usize, ) -> Array2<f64>

Trait Implementations§

impl AnalyticPenalty for SoftmaxAssignmentSparsityPenalty

fn tier(&self) -> PenaltyTier

fn value(&self, target: ArrayView1<'_, f64>, rho: ArrayView1<'_, f64>) -> f64

fn grad_target( &self, target: ArrayView1<'_, f64>, rho: ArrayView1<'_, f64>, ) -> Array1<f64>

fn hessian_diag( &self, target: ArrayView1<'_, f64>, rho: ArrayView1<'_, f64>, ) -> Option<Array1<f64>>

fn hvp( &self, target: ArrayView1<'_, f64>, rho: ArrayView1<'_, f64>, v: ArrayView1<'_, f64>, ) -> Array1<f64>

fn psd_majorizer_diag( &self, target: ArrayView1<'_, f64>, rho: ArrayView1<'_, f64>, ) -> Option<Array1<f64>>

fn grad_rho( &self, target: ArrayView1<'_, f64>, rho: ArrayView1<'_, f64>, ) -> Array1<f64>

fn rho_count(&self) -> usize

fn name(&self) -> &str

fn apply_schedule(&mut self, iter: usize)

fn psd_majorizer_hvp( &self, target: ArrayView1<'_, f64>, rho: ArrayView1<'_, f64>, v: ArrayView1<'_, f64>, ) -> Array1<f64>

impl Clone for SoftmaxAssignmentSparsityPenalty

fn clone(&self) -> SoftmaxAssignmentSparsityPenalty

fn clone_from(&mut self, source: &Self)

impl Debug for SoftmaxAssignmentSparsityPenalty

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl PenaltyManifest for SoftmaxAssignmentSparsityPenalty

const KIND_TAG: &'static str = "softmax_assignment_sparsity"

const PYTHON_WRAPPER: &'static str = "SoftmaxAssignmentSparsityPenalty"

const ROW_BLOCK_DIAGONAL: bool = true

fn dispatch_tier(&self) -> PenaltyTier

Auto Trait Implementations§

impl Freeze for SoftmaxAssignmentSparsityPenalty

impl RefUnwindSafe for SoftmaxAssignmentSparsityPenalty

impl Send for SoftmaxAssignmentSparsityPenalty

impl Sync for SoftmaxAssignmentSparsityPenalty

impl Unpin for SoftmaxAssignmentSparsityPenalty

impl UnsafeUnpin for SoftmaxAssignmentSparsityPenalty

impl UnwindSafe for SoftmaxAssignmentSparsityPenalty

Blanket Implementations§

impl<T> Allocation for Twhere T: RefUnwindSafe + Send + Sync,

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> ByRef<T> for T

fn by_ref(&self) -> &T

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DTwhere ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DTwhere ST: ?Sized, DT: ?Sized,

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> DistributionExt for Twhere T: ?Sized,

fn rand<T>(&self, rng: &mut (impl Rng + ?Sized)) -> Twhere Self: Distribution<T>,

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Imply<T> for Uwhere T: ?Sized, U: ?Sized,

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> Read<Exclusive, BecauseExclusive> for Twhere T: ?Sized,

impl<T> Same for T

type Output = T

impl<SS, SP> SupersetOf<SS> for SPwhere SS: SubsetOf<SP>,

fn to_subset(&self) -> Option<SS>

Struct SoftmaxAssignmentSparsityPenalty

impl<T> Allocation for T
where T: RefUnwindSafe + Send + Sync,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
where ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
where ST: ?Sized, DT: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T> DistributionExt for T
where T: ?Sized,

fn rand<T>(&self, rng: &mut (impl Rng + ?Sized)) -> T
where Self: Distribution<T>,

impl<T, U> Imply<T> for U
where T: ?Sized, U: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> Read<Exclusive, BecauseExclusive> for T
where T: ?Sized,

impl<SS, SP> SupersetOf<SS> for SP
where SS: SubsetOf<SP>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,