//! Llama model implementations
//!
//! This module contains:
//! - `BitLlama` - Low-level model with 1.58-bit quantization support
//! - `Llama` - High-level API with tokenizer and state management (requires tokenizers feature)
pub use BitLlama;
pub use Llama;
/// Epsilon for RMSNorm
pub const RMS_NORM_EPS: f64 = 1e-5;
/// Minimum temperature for sampling
pub const TEMP_MIN: f64 = 1e-6;