boostr 0.1.0

ML framework built on numr - attention, quantization, model architectures
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
pub mod architecture;
pub mod attention;
pub mod cache;
pub mod inference;
pub mod position;
pub mod quantization;
pub mod training;

pub use architecture::MoEOps;
pub use attention::{
    AttentionOps, FlashAttentionOps, FusedQkvOps, MlaOps, PagedAttentionOps, VarLenAttentionOps,
};
pub use cache::{Int4GroupSize, KvCacheOps, KvCacheQuantOps, KvQuantMode};
pub use inference::{DeviceGrammarDfa, GrammarDfaOps, SamplingOps, SpeculativeOps};
pub use position::{AlibiOps, RoPEOps};
pub use quantization::CalibrationOps;
pub use training::{FusedFp8TrainingOps, FusedOptimizerOps};