realizar 0.8.4

Pure Rust ML inference engine built from scratch - model serving for GGUF and safetensors
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
//! Cached model wrappers for efficient GPU inference
//!
//! This module contains:
//! - `single.rs`: OwnedQuantizedModelCached (RefCell-based, single-threaded)
//! - `sync.rs`: OwnedQuantizedModelCachedSync (Mutex-based, thread-safe)
//! - `weights.rs`: DequantizedWeightCache for GPU GEMM

mod single;
mod sync;
mod weights;

#[cfg(test)]
mod sync_tests;

pub use single::OwnedQuantizedModelCached;
pub use sync::OwnedQuantizedModelCachedSync;
pub use weights::{DequantizedFFNWeights, DequantizedWeightCache};