1pub mod attention;
44pub mod config;
45pub mod error;
46pub mod graph;
47pub mod hyperbolic;
48pub mod moe;
49pub mod sdk;
50pub mod sparse;
51pub mod training;
52pub mod traits;
53pub mod utils;
54
55pub mod curvature;
57pub mod topology;
58pub mod transport;
59
60pub mod info_bottleneck;
62pub mod info_geometry;
63pub mod pde_attention;
64pub mod unified_report;
65
66#[cfg(feature = "sheaf")]
68pub mod sheaf;
69
70pub use attention::{MLACache, MLAConfig, MLALayer, MemoryComparison};
72pub use attention::{MultiHeadAttention, ScaledDotProductAttention};
73pub use config::{AttentionConfig, GraphAttentionConfig, SparseAttentionConfig};
74pub use error::{AttentionError, AttentionResult};
75pub use hyperbolic::{
76 exp_map, log_map, mobius_add, poincare_distance, project_to_ball, HyperbolicAttention,
77 HyperbolicAttentionConfig, MixedCurvatureAttention, MixedCurvatureConfig,
78};
79pub use traits::{
80 Attention, EdgeInfo, GeometricAttention, Gradients, GraphAttention, SparseAttention,
81 SparseMask, TrainableAttention,
82};
83
84pub use sparse::{
86 AttentionMask, FlashAttention, LinearAttention, LocalGlobalAttention, SparseMaskBuilder,
87};
88
89pub use moe::{
91 Expert, ExpertType, HyperbolicExpert, LearnedRouter, LinearExpert, MoEAttention, MoEConfig,
92 Router, StandardExpert, TopKRouting,
93};
94
95pub use graph::{
97 DualSpaceAttention, DualSpaceConfig, EdgeFeaturedAttention, EdgeFeaturedConfig, GraphRoPE,
98 RoPEConfig,
99};
100
101pub use training::{
103 Adam, AdamW, CurriculumScheduler, CurriculumStage, DecayType, HardNegativeMiner, InfoNCELoss,
104 LocalContrastiveLoss, Loss, MiningStrategy, NegativeMiner, Optimizer, Reduction,
105 SpectralRegularization, TemperatureAnnealing, SGD,
106};
107
108pub use sdk::{presets, AttentionBuilder, AttentionPipeline};
110
111pub use transport::{
113 CentroidCache, CentroidOTAttention, CentroidOTConfig, ProjectionCache,
114 SlicedWassersteinAttention, SlicedWassersteinConfig, WindowCache,
115};
116
117pub use curvature::{
119 ComponentQuantizer, FusedCurvatureConfig, MixedCurvatureCache, MixedCurvatureFusedAttention,
120 QuantizationConfig, QuantizedVector, TangentSpaceConfig, TangentSpaceMapper,
121};
122
123pub use topology::{
125 AttentionMode, AttentionPolicy, CoherenceMetric, PolicyConfig, TopologyGatedAttention,
126 TopologyGatedConfig, WindowCoherence,
127};
128
129pub use info_geometry::{FisherConfig, FisherMetric, NaturalGradient, NaturalGradientConfig};
131
132pub use info_bottleneck::{DiagonalGaussian, IBConfig, InformationBottleneck, KLDivergence};
134
135pub use pde_attention::{DiffusionAttention, DiffusionConfig, GraphLaplacian, LaplacianType};
137
138#[cfg(feature = "sheaf")]
140pub use sheaf::{
141 process_with_early_exit, ComputeLane, EarlyExit, EarlyExitConfig, EarlyExitResult,
142 EarlyExitStatistics, ExitReason, LaneStatistics, ResidualSparseMask, RestrictionMap,
143 RestrictionMapConfig, RoutingDecision, SheafAttention, SheafAttentionConfig,
144 SparseResidualAttention, SparseResidualConfig, SparsityStatistics, TokenRouter,
145 TokenRouterConfig,
146};
147
148pub use unified_report::{
150 AttentionRecommendation, GeometryReport, MetricType, MetricValue, ReportBuilder, ReportConfig,
151};
152
153pub const VERSION: &str = env!("CARGO_PKG_VERSION");
155
156#[cfg(test)]
157mod tests {
158 use super::*;
159
160 #[test]
161 fn test_version() {
162 assert!(!VERSION.is_empty());
163 }
164
165 #[test]
166 fn test_basic_attention_workflow() {
167 let config = AttentionConfig::builder()
168 .dim(64)
169 .num_heads(4)
170 .build()
171 .unwrap();
172
173 assert_eq!(config.dim, 64);
174 assert_eq!(config.num_heads, 4);
175 assert_eq!(config.head_dim(), 16);
176 }
177}