Skip to main content

hot_loop/
lib.rs

1//! # High-Level Pure-Rust Crate for Running Gguf Chat-Models, Uses the Candle Backend
2//!
3//! ---
4//!
5//! ## Easy to use:
6//! ```rust
7//! use std::fs::{File, read};
8//! use std::io::{stdout, Write};
9//!
10//! use hot_loop::{
11//!     Model,
12//!     models::Qwen3,
13//!     Device,
14//!     Error,
15//! };
16//!
17//! fn main() -> Result<(), Error> {
18//!     let mut model_file = File::open("models/Qwen3-4B-it-Q4_K_M.gguf").unwrap();
19//!     let tokenizer_bytes = read("models/tokenizer.json").unwrap();
20//!
21//!     // model read only
22//!     let model = Qwen3::load(&mut model_file, &tokenizer_bytes, &Device::Cpu)?;
23//!
24//!     let mut session = model.new_session();
25//!     // and more sessions!
26//!     // let mut session2 = model.new_session();
27//!     // let mut session3 = model.new_session();
28//!
29//!     let mut generate = session.generate("Hello!")?;
30//!
31//!     while let Some(chunk) = generate.next_chunk()? {
32//!         print!("{chunk}");
33//!         stdout().flush().unwrap();
34//!     }
35//!
36//!     Ok(())
37//! }
38//! ```
39
40#[cfg(feature = "mkl")]
41extern crate intel_mkl_src;
42
43#[cfg(feature = "accelerate")]
44extern crate accelerate_src;
45
46pub(crate) mod utils;
47pub use candle_core::Device;
48
49pub mod models;
50
51pub(crate) mod core;
52pub use core::{
53    model_weights::Model,
54    error::Error,
55    session,
56    settings,
57};
58pub(crate) use core::{
59    model_weights::{
60        ModelWeights, KvCache, Role
61    },
62};