gllm_kernels/
lib.rs

1//! gllm-kernels: low-level attention kernels built on Burn.
2
3pub mod backend;
4pub mod comm;
5#[cfg(feature = "cuda-kernel")]
6pub mod cuda_kernels;
7#[cfg(feature = "rocm-kernel")]
8pub mod hip_kernels;
9pub mod device;
10pub mod ops;
11pub mod types;
12
13pub use backend::{select_device, DefaultBackend};
14pub use comm::{CommError, CommResult, Communicator, SharedMemoryComm, SharedMemoryGroup, TcpComm};
15#[cfg(feature = "cuda-kernel")]
16pub use cuda_kernels::{FlashAttentionError, FlashAttentionKernel, OptimizedCudaAttention};
17#[cfg(feature = "rocm-kernel")]
18pub use hip_kernels::{
19    FlashAttentionError as HipFlashAttentionError, FlashAttentionKernel as HipFlashAttentionKernel,
20    OptimizedHipAttention,
21};
22pub use device::{default_device, DefaultDevice};
23pub use ops::flash_attention::{AttentionWorkspace, FlashAttentionConfig, FusedPagedAttention, HierarchicalFlashAttention};
24pub use ops::flash_attention_v3::{FlashAttention3, FlashAttention3Config};
25pub use ops::kv_compression::{CompressedKV, CompressionMethod, KVCacheCompressor, KVLayout};
26pub use ops::mamba::{
27    HybridLayer, HybridStrategy, MambaBlock, MambaConfig, MambaParameters, MambaState,
28};
29pub use ops::mla::{CompressedKVCache, MultiHeadLatentAttention};
30pub use ops::paged_attention::{
31    BlockManager, BlockTable, KVBlock, KVBlockIterator, KVBlockRef, PagedAttention, PagedKVCache,
32};
33pub use ops::ring_attention::{CommBackend, RingAttention, RingAttentionConfig};
34pub use ops::speculative_decoding::{
35    PredictionConfig, PredictionHeadType, SpeculativeCandidates, SpeculativeDecoder,
36    SpeculativeToken, SpeculativeTree, SpeculativeVerification, TreeConfig, VerificationStrategy,
37};
38pub use ops::sparse_attention::{
39    SparseAttention, SparseAttentionConfig, SparseSelection, SparsityPattern,
40};
41pub use ops::softmax::{log_add_exp, log_sum_exp, log_sum_exp_kahan, LogSpaceSoftmax};
42pub use ops::stable_accumulator::{
43    AccumulatorConfig, HierarchicalAccumulator, KahanAccumulator, KahanSum, OutputAccumulator,
44    StableAccumulator, StableRowState,
45};
46pub use types::{AttentionConfig, KernelPrecision, PagedAttentionConfig};
47
48/// Library version.
49pub const VERSION: &str = env!("CARGO_PKG_VERSION");