List of all items
Structs
- attention::AttentionParams
- attention::FusedTransformer
- attention::LayerWeights
- attention::TransformerConfig
- attention::cpu::transformer::CpuKvCache
- backend::AttnConfig
- backend::KvBf16
- backend::KvCache
- backend::KvCacheQuant
- backend::KvFp16
- backend::KvFp8
- backend::KvInt8
- backend::MoeRouting
- backend::QuantWeights
- backend::cpu::CpuBackend
- backend::cpu::CpuGptqStore
- backend::timer::CpuTimer
- quant_linear::cpu_dequant::CpuGptqLinear
- quant_linear::cpu_gguf::CpuGgufLinear
- quant_linear::cpu_marlin_stack::CpuMarlinExpertStack
Enums
- backend::GgufQuantType
- backend::QuantKind
- backend::ReduceOp
- backend::SrcDtype
- backend::buffer::CpuBuf
- backend::cpu::CpuQuantStore
- backend::dtype::Dtype
Traits
- backend::Backend
- backend::BackendCollective
- backend::BackendGraph
- backend::BackendInt8KvOps
- backend::BackendKvDtype
- backend::BackendMoeFused
- backend::BackendPagedKv
- backend::BackendQuantGguf
- backend::BackendQuantMarlin
- backend::KvDtypeKind
- backend::KvLayer
- backend::LlmBackend
- backend::MoeLlmBackend
- backend::QuantLlmBackend
- backend::dtype::HostDtype
- backend::timer::BackendTimer
- linear::Linear
- marlin_expert_stack::MarlinExpertStack
- stacked_expert::StackedExpertGgufLinear
Functions
- attention::attention
- attention::attention_cpu
- attention::cpu::fused_attention
- attention::cpu::gemm_at_b
- attention::cpu::softmax_inplace
- attention::cpu::transformer::cpu_layer_forward
- backend::timer::finish_probe_timer
- backend::timer::finish_probe_timer_traced
- backend::timer::start_probe_timer_if
- configure_native_profile_sink
- moe_host::compute_ids_tpe