hot_loop/lib.rs
1//! # High-Level Pure-Rust Crate for Running Gguf Chat-Models, Uses the Candle Backend
2//!
3//! ---
4//!
5//! ## Easy to use:
6//! ```rust
7//! use std::fs::{File, read};
8//! use std::io::{stdout, Write};
9//!
10//! use hot_loop::{
11//! Model,
12//! models::Qwen3,
13//! Device,
14//! Error,
15//! };
16//!
17//! fn main() -> Result<(), Error> {
18//! let mut model_file = File::open("models/Qwen3-4B-it-Q4_K_M.gguf").unwrap();
19//! let tokenizer_bytes = read("models/tokenizer.json").unwrap();
20//!
21//! // model read only
22//! let model = Qwen3::load(&mut model_file, &tokenizer_bytes, &Device::Cpu)?;
23//!
24//! let mut session = model.new_session();
25//! // and more sessions!
26//! // let mut session2 = model.new_session();
27//! // let mut session3 = model.new_session();
28//!
29//! let mut generate = session.generate("Hello!")?;
30//!
31//! while let Some(chunk) = generate.next_chunk()? {
32//! print!("{chunk}");
33//! stdout().flush().unwrap();
34//! }
35//!
36//! Ok(())
37//! }
38//! ```
39
40#[cfg(feature = "mkl")]
41extern crate intel_mkl_src;
42
43#[cfg(feature = "accelerate")]
44extern crate accelerate_src;
45
46pub(crate) mod utils;
47pub use candle_core::Device;
48
49pub mod models;
50
51pub(crate) mod core;
52pub use core::{
53 model_weights::Model,
54 error::Error,
55 session,
56 settings,
57};
58pub(crate) use core::{
59 model_weights::{
60 ModelWeights, KvCache, Role
61 },
62};