ggml/
lib.rs

1//! `ggml` is a semi-idiomatic wrapper for the `ggml` C library.
2//!
3//! It exposes a subset of operations (currently used to implement the [llm](https://crates.io/crates/llm) library).
4//! Note that it does not expose a fully-idiomatic safe Rust interface; operations that could be potentially unsafe are marked as such.
5//!
6//! `ggml` operates on a computational graph; no values will be computed until [Context::graph_compute] is executed.
7//! All [Tensor]s are nodes in this computational graph, and values cannot be retrieved until computation is completed.
8#![deny(missing_docs)]
9
10use std::os::raw::{c_int, c_void};
11
12mod context;
13mod tensor;
14
15pub mod format;
16pub mod util;
17
18pub use context::Context;
19pub use tensor::Tensor;
20
21pub(crate) use ggml_sys as sys;
22
23#[cfg(test)]
24mod tests;
25
26/// The type of a tensor element.
27pub type ElementType = Type;
28
29#[derive(Debug, PartialEq, Clone, Copy)]
30/// The format of the file containing the model.
31pub enum ContainerType {
32    /// Legacy format, oldest ggml tensor file format
33    Ggml,
34    /// Legacy format. Introduces versioning. Newer than GGML, older than GGJT.
35    Ggmf,
36    /// [mmap](https://en.wikipedia.org/wiki/Mmap)-able format.
37    Ggjt,
38}
39impl ContainerType {
40    /// Does this container type support mmap?
41    pub fn support_mmap(&self) -> bool {
42        match self {
43            ContainerType::Ggml => false,
44            ContainerType::Ggmf => false,
45            ContainerType::Ggjt => true,
46        }
47    }
48}
49
50/// Magic constant for `ggml` files (versioned, ggmf).
51pub const FILE_MAGIC_GGMF: u32 = 0x67676d66;
52/// Magic constant for `ggml` files (versioned, ggjt).
53pub const FILE_MAGIC_GGJT: u32 = 0x67676a74;
54/// Magic constant for `ggml` files (unversioned).
55pub const FILE_MAGIC_UNVERSIONED: u32 = 0x67676d6c;
56
57/// The currently-supported format version for `ggml` files.
58pub const FORMAT_VERSION: u32 = 1;
59
60/// The size of a `ggml` object.
61pub const OBJECT_SIZE: usize = sys::GGML_OBJECT_SIZE;
62
63#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
64/// The type of a value in `ggml`.
65pub enum Type {
66    /// Quantized 4-bit (type 0).
67    #[default]
68    Q4_0,
69    /// Quantized 4-bit (type 1); used by GPTQ.
70    Q4_1,
71    /// Quantized 4-bit (type 2).
72    Q4_2,
73    /// Quantized 5-bit (type 0).
74    Q5_0,
75    /// Quantized 5-bit (type 1).
76    Q5_1,
77    /// Quantized 8-bit (type 0).
78    Q8_0,
79    /// Quantized 8-bit (type 1).
80    Q8_1,
81    /// Integer 32-bit.
82    I32,
83    /// Float 16-bit.
84    F16,
85    /// Float 32-bit.
86    F32,
87}
88impl From<Type> for sys::ggml_type {
89    fn from(t: Type) -> Self {
90        match t {
91            Type::Q4_0 => sys::ggml_type_GGML_TYPE_Q4_0,
92            Type::Q4_1 => sys::ggml_type_GGML_TYPE_Q4_1,
93            Type::Q4_2 => sys::ggml_type_GGML_TYPE_Q4_2,
94            Type::Q5_0 => sys::ggml_type_GGML_TYPE_Q5_0,
95            Type::Q5_1 => sys::ggml_type_GGML_TYPE_Q5_1,
96            Type::Q8_0 => sys::ggml_type_GGML_TYPE_Q8_0,
97            Type::Q8_1 => sys::ggml_type_GGML_TYPE_Q8_1,
98            Type::I32 => sys::ggml_type_GGML_TYPE_I32,
99            Type::F16 => sys::ggml_type_GGML_TYPE_F16,
100            Type::F32 => sys::ggml_type_GGML_TYPE_F32,
101        }
102    }
103}
104impl TryFrom<sys::ggml_type> for Type {
105    type Error = ();
106    fn try_from(t: sys::ggml_type) -> Result<Self, Self::Error> {
107        match t {
108            sys::ggml_type_GGML_TYPE_Q4_0 => Ok(Type::Q4_0),
109            sys::ggml_type_GGML_TYPE_Q4_1 => Ok(Type::Q4_1),
110            sys::ggml_type_GGML_TYPE_Q4_2 => Ok(Type::Q4_2),
111            sys::ggml_type_GGML_TYPE_Q5_0 => Ok(Type::Q5_0),
112            sys::ggml_type_GGML_TYPE_Q5_1 => Ok(Type::Q5_1),
113            sys::ggml_type_GGML_TYPE_Q8_0 => Ok(Type::Q8_0),
114            sys::ggml_type_GGML_TYPE_Q8_1 => Ok(Type::Q8_1),
115            sys::ggml_type_GGML_TYPE_I32 => Ok(Type::I32),
116            sys::ggml_type_GGML_TYPE_F16 => Ok(Type::F16),
117            sys::ggml_type_GGML_TYPE_F32 => Ok(Type::F32),
118            _ => Err(()),
119        }
120    }
121}
122impl std::fmt::Display for Type {
123    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124        match self {
125            Type::Q4_0 => write!(f, "q4_0"),
126            Type::Q4_1 => write!(f, "q4_1"),
127            Type::Q4_2 => write!(f, "q4_2"),
128            Type::Q5_0 => write!(f, "q5_0"),
129            Type::Q5_1 => write!(f, "q5_1"),
130            Type::Q8_0 => write!(f, "q8_0"),
131            Type::Q8_1 => write!(f, "q8_1"),
132            Type::I32 => write!(f, "i32"),
133            Type::F16 => write!(f, "f16"),
134            Type::F32 => write!(f, "f32"),
135        }
136    }
137}
138
139/// A buffer of memory that can be used as a scratch buffer for a [Context].
140///
141/// See [Context::use_scratch].
142pub struct Buffer {
143    data: Box<[u8]>,
144}
145
146impl Buffer {
147    /// Creates a new buffer of the specified size.
148    pub fn new(size: usize) -> Self {
149        let mut data: Vec<u8> = Vec::with_capacity(size);
150
151        // SAFETY: The contents are intentionally uninitialized, as they will be passed to
152        // the ggml C API which will fill them with data.
153        #[allow(clippy::uninit_vec)]
154        unsafe {
155            data.set_len(size);
156        }
157
158        Buffer {
159            data: data.into_boxed_slice(),
160        }
161    }
162}
163
164/// A `ggml` computation graph. Keeps track of all state during computation.
165pub struct ComputationGraph {
166    inner: sys::ggml_cgraph,
167}
168
169impl ComputationGraph {
170    /// Create a new [ComputationGraph] with the specified `n_threads`.
171    pub fn new(n_threads: usize) -> Self {
172        Self {
173            inner: sys::ggml_cgraph {
174                n_threads: usize_to_i32(n_threads),
175                // SAFETY: This should be safe to zero. The original C++ impl
176                // just leaves it uninitialized
177                ..unsafe { std::mem::zeroed::<sys::ggml_cgraph>() }
178            },
179        }
180    }
181
182    /// Build this computational graph in the forward direction in preparation for computation.
183    pub fn build_forward_expand(&mut self, tensor: &Tensor) {
184        unsafe { sys::ggml_build_forward_expand(&mut self.inner, tensor.ptr.as_ptr()) }
185    }
186}
187
188/// The size of `t` as bytes.
189pub fn type_size(t: Type) -> usize {
190    unsafe { sys::ggml_type_size(t.into()) }
191}
192
193/// [type_size]/[blck_size] as float.
194pub fn type_sizef(x: Type) -> f64 {
195    (unsafe { sys::ggml_type_sizef(x.into()) }) as f64
196}
197
198/// The size of a block for `t`. Only relevant for quantized types.
199pub fn blck_size(t: Type) -> usize {
200    i32_to_usize(unsafe { sys::ggml_blck_size(t.into()) })
201}
202
203fn usize_to_i32(val: usize) -> i32 {
204    i32::try_from(val).unwrap()
205}
206
207fn usize_to_i64(val: usize) -> i64 {
208    i64::try_from(val).unwrap()
209}
210
211fn i32_to_usize(val: i32) -> usize {
212    usize::try_from(val).unwrap()
213}
214
215fn i64_to_usize(val: i64) -> usize {
216    usize::try_from(val).unwrap()
217}
218
219/// Contains the result of a quantization operation.
220pub struct QuantizationResult {
221    /// The quantized output.
222    pub output: Vec<u8>,
223    /// The quantization history.
224    pub history: Vec<i64>,
225}
226
227/// Quantizes `src` into `dst` using `q4_0` quantization.
228///
229/// You must ensure that `src.len() == n_elements`, and `n_elements_0`
230/// is the first dimension of `src`.
231pub fn quantize_q4_0(src: &[f32], n_elements: usize, n_elements_0: usize) -> QuantizationResult {
232    quantize_impl(src, n_elements, n_elements_0, sys::ggml_quantize_q4_0)
233}
234
235/// Quantizes `src` into `dst` using `q4_1` quantization.
236///
237/// You must ensure that `src.len() == n_elements`, and `n_elements_0`
238/// is the first dimension of `src`.
239pub fn quantize_q4_1(src: &[f32], n_elements: usize, n_elements_0: usize) -> QuantizationResult {
240    quantize_impl(src, n_elements, n_elements_0, sys::ggml_quantize_q4_1)
241}
242
243fn quantize_impl(
244    src: &[f32],
245    n_elements: usize,
246    n_elements_0: usize,
247    quantizer: unsafe extern "C" fn(*const f32, *mut c_void, c_int, c_int, *mut i64) -> usize,
248) -> QuantizationResult {
249    assert_eq!(src.len(), n_elements);
250    assert_eq!(n_elements % n_elements_0, 0);
251
252    // A conservative multiplier of 4 is used here.
253    let mut output = vec![0u8; n_elements * 4];
254    let mut history = vec![0i64; 16];
255    let output_size = unsafe {
256        quantizer(
257            src.as_ptr(),
258            output.as_mut_ptr() as *mut c_void,
259            n_elements.try_into().unwrap(),
260            n_elements_0.try_into().unwrap(),
261            history.as_mut_ptr(),
262        )
263    };
264
265    output.resize(output_size, 0u8);
266    QuantizationResult { output, history }
267}