trueno/inference/mod.rs
1//! End-to-end LLM inference engine.
2//!
3//! Composes trueno's compute primitives (Q4K matmul, RMS norm, fused attention,
4//! SIMD softmax) into a complete transformer that loads GGUF models and generates text.
5//!
6//! # Example
7//!
8//! ```rust,ignore
9//! use trueno::inference::{GgufFile, LlamaModel, generate, SampleParams};
10//!
11//! let gguf = GgufFile::load(Path::new("model.gguf"))?;
12//! let model = LlamaModel::from_gguf(&gguf)?;
13//! let tokens = generate(&model, &[1], 100, &SampleParams::default(), 2)?;
14//! ```
15
16pub mod generate;
17pub mod gguf;
18pub mod model;
19
20pub use generate::{generate, SampleParams};
21pub use gguf::GgufFile;
22pub use model::{ForwardArena, KvCache, LlamaModel, ModelConfig};