Skip to main content

ferrum_kernels/
lib.rs

1//! Ferrum unified compute kernels for high-performance inference.
2//!
3//! Provides the `Backend` trait and implementations for CUDA, Metal, and CPU.
4//! On CUDA builds, kernels are compiled to PTX during `cargo build` and loaded
5//! on demand at runtime.
6
7pub mod backend;
8
9pub mod linear;
10pub use linear::Linear;
11
12#[cfg(feature = "cuda")]
13pub(crate) mod ptx {
14    include!(concat!(env!("OUT_DIR"), "/ptx.rs"));
15}
16
17#[cfg(feature = "cuda")]
18mod fused_add_rms_norm;
19#[cfg(feature = "cuda")]
20pub use fused_add_rms_norm::fused_add_rms_norm;
21
22#[cfg(feature = "cuda")]
23mod fused_silu_mul;
24#[cfg(feature = "cuda")]
25pub use fused_silu_mul::fused_silu_mul;
26
27#[cfg(feature = "cuda")]
28mod rms_norm;
29#[cfg(feature = "cuda")]
30pub use rms_norm::rms_norm;
31
32#[cfg(feature = "cuda")]
33mod rope;
34#[cfg(feature = "cuda")]
35pub use rope::rope;
36
37#[cfg(feature = "cuda")]
38mod decode_attention;
39#[cfg(feature = "cuda")]
40pub use decode_attention::decode_attention;
41
42#[cfg(feature = "cuda")]
43mod residual_add;
44#[cfg(feature = "cuda")]
45pub use residual_add::residual_add;
46
47#[cfg(feature = "cuda")]
48pub mod cublas;
49
50#[cfg(feature = "cuda")]
51pub mod decode_buffers;
52
53#[cfg(feature = "cuda")]
54pub mod weight_store;
55
56#[cfg(feature = "cuda")]
57pub mod cuda_graph;
58
59#[cfg(feature = "cuda")]
60pub mod quant;
61
62#[cfg(feature = "cuda")]
63pub mod marlin;
64
65#[cfg(feature = "cuda")]
66pub mod gpu_paged_kv;
67
68#[cfg(feature = "cuda")]
69pub mod cuda_decode;
70
71#[cfg(feature = "cuda")]
72pub mod nccl_comm;
73
74#[cfg(feature = "cuda")]
75pub mod tp_decode;