1pub mod attention;
48pub mod config;
49pub mod error;
50pub mod graph;
51pub mod hyperbolic;
52pub mod moe;
53pub mod sdk;
54pub mod sparse;
55pub mod training;
56pub mod traits;
57pub mod utils;
58
59pub mod curvature;
61pub mod topology;
62pub mod transport;
63
64pub mod info_bottleneck;
66pub mod info_geometry;
67pub mod pde_attention;
68pub mod unified_report;
69
70#[cfg(feature = "sheaf")]
72pub mod sheaf;
73
74pub use attention::{MLACache, MLAConfig, MLALayer, MemoryComparison};
76pub use attention::{MultiHeadAttention, ScaledDotProductAttention};
77pub use config::{AttentionConfig, GraphAttentionConfig, SparseAttentionConfig};
78pub use error::{AttentionError, AttentionResult};
79pub use hyperbolic::{
80 exp_map, log_map, mobius_add, poincare_distance, project_to_ball, HyperbolicAttention,
81 HyperbolicAttentionConfig, MixedCurvatureAttention, MixedCurvatureConfig,
82};
83pub use traits::{
84 Attention, EdgeInfo, GeometricAttention, Gradients, GraphAttention, SparseAttention,
85 SparseMask, TrainableAttention,
86};
87
88pub use sparse::{
90 AttentionMask, FlashAttention, LinearAttention, LocalGlobalAttention, SparseMaskBuilder,
91};
92
93pub use moe::{
95 Expert, ExpertType, HyperbolicExpert, LearnedRouter, LinearExpert, MoEAttention, MoEConfig,
96 Router, StandardExpert, TopKRouting,
97};
98
99pub use graph::{
101 DualSpaceAttention, DualSpaceConfig, EdgeFeaturedAttention, EdgeFeaturedConfig, GraphRoPE,
102 RoPEConfig,
103};
104
105pub use training::{
107 Adam, AdamW, CurriculumScheduler, CurriculumStage, DecayType, HardNegativeMiner, InfoNCELoss,
108 LocalContrastiveLoss, Loss, MiningStrategy, NegativeMiner, Optimizer, Reduction,
109 SpectralRegularization, TemperatureAnnealing, SGD,
110};
111
112pub use sdk::{presets, AttentionBuilder, AttentionPipeline};
114
115pub use transport::{
117 CentroidCache, CentroidOTAttention, CentroidOTConfig, ProjectionCache,
118 SlicedWassersteinAttention, SlicedWassersteinConfig, WindowCache,
119};
120
121pub use curvature::{
123 ComponentQuantizer, FusedCurvatureConfig, MixedCurvatureCache, MixedCurvatureFusedAttention,
124 QuantizationConfig, QuantizedVector, TangentSpaceConfig, TangentSpaceMapper,
125};
126
127pub use topology::{
129 AttentionMode, AttentionPolicy, CoherenceMetric, PolicyConfig, TopologyGatedAttention,
130 TopologyGatedConfig, WindowCoherence,
131};
132
133pub use info_geometry::{FisherConfig, FisherMetric, NaturalGradient, NaturalGradientConfig};
135
136pub use info_bottleneck::{DiagonalGaussian, IBConfig, InformationBottleneck, KLDivergence};
138
139pub use pde_attention::{DiffusionAttention, DiffusionConfig, GraphLaplacian, LaplacianType};
141
142#[cfg(feature = "sheaf")]
144pub use sheaf::{
145 process_with_early_exit, ComputeLane, EarlyExit, EarlyExitConfig, EarlyExitResult,
146 EarlyExitStatistics, ExitReason, LaneStatistics, ResidualSparseMask, RestrictionMap,
147 RestrictionMapConfig, RoutingDecision, SheafAttention, SheafAttentionConfig,
148 SparseResidualAttention, SparseResidualConfig, SparsityStatistics, TokenRouter,
149 TokenRouterConfig,
150};
151
152pub use unified_report::{
154 AttentionRecommendation, GeometryReport, MetricType, MetricValue, ReportBuilder, ReportConfig,
155};
156
157pub const VERSION: &str = env!("CARGO_PKG_VERSION");
159
160#[cfg(test)]
161mod tests {
162 use super::*;
163
164 #[test]
165 fn test_version() {
166 assert!(!VERSION.is_empty());
167 }
168
169 #[test]
170 fn test_basic_attention_workflow() {
171 let config = AttentionConfig::builder()
172 .dim(64)
173 .num_heads(4)
174 .build()
175 .unwrap();
176
177 assert_eq!(config.dim, 64);
178 assert_eq!(config.num_heads, 4);
179 assert_eq!(config.head_dim(), 16);
180 }
181}