Skip to main content

ruvector_attention/info_geometry/
mod.rs

1//! Information Geometry for Attention
2//!
3//! Natural gradient methods using Fisher information metric.
4//!
5//! ## Key Concepts
6//!
7//! 1. **Fisher Metric**: F = diag(p) - p*p^T on probability simplex
8//! 2. **Natural Gradient**: Solve F*delta = grad, then update params -= lr*delta
9//! 3. **Conjugate Gradient**: Efficient solver for Fisher system
10//!
11//! ## Use Cases
12//!
13//! - Training attention weights with proper geometry
14//! - Routing probabilities in MoE
15//! - Softmax logit optimization
16
17mod fisher;
18mod natural_gradient;
19
20pub use fisher::{FisherConfig, FisherMetric};
21pub use natural_gradient::{NaturalGradient, NaturalGradientConfig};
22
23#[cfg(test)]
24mod tests {
25    #[test]
26    fn test_module_exists() {
27        assert!(true);
28    }
29}