1pub fn configure_native_profile_sink(
8 config: &ferrum_bench_core::ProfileSinkConfig,
9) -> std::io::Result<()> {
10 #[cfg(all(feature = "cuda", feature = "vllm-moe-marlin"))]
11 backend::cuda::marlin::configure_vllm_moe_profile_sink(config)?;
12 #[cfg(not(all(feature = "cuda", feature = "vllm-moe-marlin")))]
13 let _ = config;
14 Ok(())
15}
16
17pub mod backend;
18
19pub mod linear;
20pub use linear::Linear;
21
22pub mod stacked_expert;
23pub use stacked_expert::StackedExpertGgufLinear;
24
25pub mod marlin_expert_stack;
26pub use marlin_expert_stack::MarlinExpertStack;
27
28pub mod quant_linear;
29
30pub mod attention;
31
32pub mod moe_host;
33
34#[cfg(all(target_os = "macos", feature = "metal"))]
40pub use backend::metal::{
41 moe_post_ops, moe_post_ops_batched, moe_router, q4_k, q4_k_gemm, q4_k_gemv, q4_k_gemv_v2,
42 q4_k_moe_id_gate_up_silu, q4_k_moe_id_gate_up_silu_batched, q4_k_moe_id_gemm, q4_k_moe_id_gemv,
43 q4_k_moe_id_gemv_batched, q6_k_gemm, q6_k_gemv, q6_k_moe_id_gemm, q6_k_moe_id_gemv,
44 q6_k_moe_id_gemv_batched,
45};
46
47#[cfg(feature = "cuda")]
48pub(crate) mod ptx {
49 #![allow(dead_code)]
54 include!(concat!(env!("OUT_DIR"), "/ptx.rs"));
55}
56
57#[cfg(feature = "cuda")]
68pub mod int8_kv;
69#[cfg(feature = "cuda")]
70pub mod quant;
71
72#[cfg(feature = "cuda")]
73pub use backend::cuda::{
74 cublas, cuda_decode, cuda_graph, decode_buffers, gpu_paged_kv, marlin, nccl_comm, tp_decode,
75 weight_store,
76};
77
78#[cfg(feature = "cuda")]
79pub use backend::cuda::decode_attention::decode_attention;
80#[cfg(feature = "cuda")]
81pub use backend::cuda::fused_add_rms_norm::fused_add_rms_norm;
82#[cfg(feature = "cuda")]
83pub use backend::cuda::fused_silu_mul::fused_silu_mul;
84#[cfg(feature = "cuda")]
85pub use backend::cuda::residual_add::residual_add;
86#[cfg(feature = "cuda")]
87pub use backend::cuda::rms_norm::rms_norm;
88#[cfg(feature = "cuda")]
89pub use backend::cuda::rope::rope;
90
91#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
95pub(crate) use backend::cuda::{triton_meta, triton_ptx};
96
97#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
98pub use backend::cuda::triton_add_bias::add_bias_triton;
99#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
100pub use backend::cuda::triton_fused_add_rms_norm::fused_add_rms_norm_triton;
101#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
102pub use backend::cuda::triton_fused_silu_mul::fused_silu_mul_triton;
103#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
104pub use backend::cuda::triton_gelu::gelu_triton;
105#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
106pub use backend::cuda::triton_layer_norm::layer_norm_triton;
107#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
108pub use backend::cuda::triton_residual_add::residual_add_triton;
109#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
110pub use backend::cuda::triton_residual_add_inplace::residual_add_inplace_triton;
111#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
112pub use backend::cuda::triton_rms_norm::rms_norm_triton;
113#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
114pub use backend::cuda::triton_softmax::softmax_triton;
115#[cfg(all(feature = "cuda", feature = "triton-kernels"))]
116pub use backend::cuda::{triton_fused_moe, triton_w4a16};
117
118#[cfg(feature = "vllm-marlin")]
121pub use backend::cuda::vllm_marlin;