pub fn configure_native_profile_sink(
config: &ferrum_bench_core::ProfileSinkConfig,
) -> std::io::Result<()> {
#[cfg(all(feature = "cuda", feature = "vllm-moe-marlin"))]
backend::cuda::marlin::configure_vllm_moe_profile_sink(config)?;
#[cfg(not(all(feature = "cuda", feature = "vllm-moe-marlin")))]
let _ = config;
Ok(())
}
pub mod backend;
pub mod linear;
pub use linear::Linear;
pub mod stacked_expert;
pub use stacked_expert::StackedExpertGgufLinear;
pub mod marlin_expert_stack;
pub use marlin_expert_stack::MarlinExpertStack;
pub mod quant_linear;
pub mod attention;
pub mod moe_host;
#[cfg(all(target_os = "macos", feature = "metal"))]
pub use backend::metal::{
moe_post_ops, moe_post_ops_batched, moe_router, q4_k, q4_k_gemm, q4_k_gemv, q4_k_gemv_v2,
q4_k_moe_id_gate_up_silu, q4_k_moe_id_gate_up_silu_batched, q4_k_moe_id_gemm, q4_k_moe_id_gemv,
q4_k_moe_id_gemv_batched, q6_k_gemm, q6_k_gemv, q6_k_moe_id_gemm, q6_k_moe_id_gemv,
q6_k_moe_id_gemv_batched,
};
#[cfg(feature = "cuda")]
pub(crate) mod ptx {
#![allow(dead_code)]
include!(concat!(env!("OUT_DIR"), "/ptx.rs"));
}
#[cfg(feature = "cuda")]
pub mod int8_kv;
#[cfg(feature = "cuda")]
pub mod quant;
#[cfg(feature = "cuda")]
pub use backend::cuda::{
cublas, cuda_decode, cuda_graph, decode_buffers, gpu_paged_kv, marlin, nccl_comm, tp_decode,
weight_store,
};
#[cfg(feature = "cuda")]
pub use backend::cuda::decode_attention::decode_attention;
#[cfg(feature = "cuda")]
pub use backend::cuda::fused_add_rms_norm::fused_add_rms_norm;
#[cfg(feature = "cuda")]
pub use backend::cuda::fused_silu_mul::fused_silu_mul;
#[cfg(feature = "cuda")]
pub use backend::cuda::residual_add::residual_add;
#[cfg(feature = "cuda")]
pub use backend::cuda::rms_norm::rms_norm;
#[cfg(feature = "cuda")]
pub use backend::cuda::rope::rope;
#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
pub(crate) use backend::cuda::{triton_meta, triton_ptx};
#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
pub use backend::cuda::triton_add_bias::add_bias_triton;
#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
pub use backend::cuda::triton_fused_add_rms_norm::fused_add_rms_norm_triton;
#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
pub use backend::cuda::triton_fused_silu_mul::fused_silu_mul_triton;
#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
pub use backend::cuda::triton_gelu::gelu_triton;
#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
pub use backend::cuda::triton_layer_norm::layer_norm_triton;
#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
pub use backend::cuda::triton_residual_add::residual_add_triton;
#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
pub use backend::cuda::triton_residual_add_inplace::residual_add_inplace_triton;
#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
pub use backend::cuda::triton_rms_norm::rms_norm_triton;
#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
pub use backend::cuda::triton_softmax::softmax_triton;
#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
pub use backend::cuda::{triton_fused_moe, triton_w4a16};
#[cfg(feature = "vllm-marlin")]
pub use backend::cuda::vllm_marlin;