realizar 0.8.5

Pure Rust ML inference engine built from scratch - model serving for GGUF and safetensors
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
//! Extracted layer operations for CudaExecutor
//!
//! Split from layer.rs (PMAT-802) to reduce module size while maintaining
//! performance through #[inline(always)] on critical paths.

mod batched;
mod cublas_prefill;
mod ffn;
mod forward;
mod graph_decode;
mod graphed;
mod indexed;
mod manual_graph;
mod prefill;

pub use ffn::{fused_ffn_swiglu_gpu, fused_ffn_swiglu_gpu_true_dp4a};