zilla-muf 0.1.1

Shared structured-matrix and numerical primitives for sparse attention and state space models (SSMs).
Documentation
use num_traits::Float;

/// Logistic sigmoid: `σ(x) = 1 / (1 + e^{-x})`.
///
/// Squashes any real input into the open interval (0, 1). Used directly
/// as a gate and as the building block of SiLU below. It saturates
/// smoothly — `σ(50) ≈ 1`, `σ(-50) ≈ 0` (the saturation tests pin this
/// down). This is the plain mathematical definition; for very large
/// negative `x`, `e^{-x}` can overflow, which here merely drives the
/// result to 0 (harmless) — worth knowing if you ever need a hardened
/// variant that branches on the sign of `x`.
pub fn sigmoid<T: Float>(x: T) -> T {
	T::one() / (T::one() + (-x).exp())
}

/// SiLU / Swish activation: `x · σ(x)`.
///
/// Mamba's gating nonlinearity. Unlike ReLU it's smooth and
/// non-monotonic (it dips slightly below zero for small negative `x`
/// before recovering toward 0), which tends to help gradient flow.
/// `SiLU(0) = 0` because the leading `x` factor zeroes it out regardless
/// of `σ(0) = 0.5`.
pub fn silu<T: Float>(x: T) -> T {
	x * sigmoid(x)
}

#[cfg(test)]
mod tests {
	use super::*;

	#[test]
	fn sigmoid_at_zero_is_one_half() {
		assert!((sigmoid(0.0_f64) - 0.5).abs() < 1e-12);
	}

	#[test]
	fn sigmoid_saturates_for_large_positive_input() {
		assert!(sigmoid(50.0_f64) > 0.999999);
	}

	#[test]
	fn sigmoid_saturates_for_large_negative_input() {
		assert!(sigmoid(-50.0_f64) < 0.000001);
	}

	#[test]
	fn silu_at_zero_is_zero() {
		assert_eq!(silu(0.0_f64), 0.0);
	}

	#[test]
	fn silu_matches_definition() {
		let x = 2.0_f64;
		let expected = x * (1.0 / (1.0 + (-x).exp()));
		assert!((silu(x) - expected).abs() < 1e-12);
	}
}