boostr 0.1.0

ML framework built on numr - attention, quantization, model architectures
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
pub mod architecture;
pub mod attention;
pub mod inference;
pub mod quantization;
pub mod training;

pub use architecture::{
    moe_grouped_gemm_fused_impl, moe_grouped_gemm_impl, moe_permute_tokens_impl,
    moe_top_k_routing_impl, moe_unpermute_tokens_impl,
};
pub use attention::{
    apply_rope_impl, apply_rope_interleaved_impl, apply_rope_yarn_impl, multi_head_attention_impl,
    scaled_dot_product_attention_impl,
};
pub use inference::{
    compute_acceptance_probs_impl, compute_expected_tokens_impl, verify_speculative_tokens_impl,
};