Expand description
gllm-kernels: low-level attention kernels built on Burn.
Re-exports§
pub use backend::select_device;pub use backend::DefaultBackend;pub use comm::CommError;pub use comm::CommResult;pub use comm::Communicator;pub use comm::TcpComm;pub use device::default_device;pub use device::DefaultDevice;pub use ops::flash_attention::AttentionWorkspace;pub use ops::flash_attention::FlashAttentionConfig;pub use ops::flash_attention::FusedPagedAttention;pub use ops::flash_attention::HierarchicalFlashAttention;pub use ops::flash_attention_v3::FlashAttention3;pub use ops::flash_attention_v3::FlashAttention3Config;pub use ops::kv_compression::CompressedKV;pub use ops::kv_compression::CompressionMethod;pub use ops::kv_compression::KVCacheCompressor;pub use ops::kv_compression::KVLayout;pub use ops::mamba::HybridLayer;pub use ops::mamba::HybridStrategy;pub use ops::mamba::MambaBlock;pub use ops::mamba::MambaConfig;pub use ops::mamba::MambaParameters;pub use ops::mamba::MambaState;pub use ops::mla::CompressedKVCache;pub use ops::mla::MultiHeadLatentAttention;pub use ops::paged_attention::BlockManager;pub use ops::paged_attention::BlockTable;pub use ops::paged_attention::KVBlock;pub use ops::paged_attention::KVBlockIterator;pub use ops::paged_attention::KVBlockRef;pub use ops::paged_attention::PagedAttention;pub use ops::paged_attention::PagedKVCache;pub use ops::ring_attention::CommBackend;pub use ops::ring_attention::RingAttention;pub use ops::ring_attention::RingAttentionConfig;pub use ops::speculative_decoding::PredictionConfig;pub use ops::speculative_decoding::PredictionHeadType;pub use ops::speculative_decoding::SpeculativeCandidates;pub use ops::speculative_decoding::SpeculativeDecoder;pub use ops::speculative_decoding::SpeculativeToken;pub use ops::speculative_decoding::SpeculativeTree;pub use ops::speculative_decoding::SpeculativeVerification;pub use ops::speculative_decoding::TreeConfig;pub use ops::speculative_decoding::VerificationStrategy;pub use ops::sparse_attention::SparseAttention;pub use ops::sparse_attention::SparseAttentionConfig;pub use ops::sparse_attention::SparseSelection;pub use ops::sparse_attention::SparsityPattern;pub use ops::softmax::log_add_exp;pub use ops::softmax::log_sum_exp;pub use ops::softmax::log_sum_exp_kahan;pub use ops::softmax::LogSpaceSoftmax;pub use ops::stable_accumulator::AccumulatorConfig;pub use ops::stable_accumulator::HierarchicalAccumulator;pub use ops::stable_accumulator::KahanAccumulator;pub use ops::stable_accumulator::KahanSum;pub use ops::stable_accumulator::OutputAccumulator;pub use ops::stable_accumulator::StableAccumulator;pub use ops::stable_accumulator::StableRowState;pub use types::AttentionConfig;pub use types::KernelPrecision;pub use types::PagedAttentionConfig;
Modules§
- backend
- comm
- Communication backends for ring attention.
- device
- Device helpers using Burn backends.
- ops
- types
- Types and configuration for attention operations.
Constants§
- VERSION
- Library version.