1#![deny(missing_docs)]
9
10use std::os::raw::{c_int, c_void};
11
12mod context;
13mod tensor;
14
15pub mod format;
16pub mod util;
17
18pub use context::Context;
19pub use tensor::Tensor;
20
21pub(crate) use ggml_sys as sys;
22
23#[cfg(test)]
24mod tests;
25
26pub type ElementType = Type;
28
29#[derive(Debug, PartialEq, Clone, Copy)]
30pub enum ContainerType {
32 Ggml,
34 Ggmf,
36 Ggjt,
38}
39impl ContainerType {
40 pub fn support_mmap(&self) -> bool {
42 match self {
43 ContainerType::Ggml => false,
44 ContainerType::Ggmf => false,
45 ContainerType::Ggjt => true,
46 }
47 }
48}
49
50pub const FILE_MAGIC_GGMF: u32 = 0x67676d66;
52pub const FILE_MAGIC_GGJT: u32 = 0x67676a74;
54pub const FILE_MAGIC_UNVERSIONED: u32 = 0x67676d6c;
56
57pub const FORMAT_VERSION: u32 = 1;
59
60pub const OBJECT_SIZE: usize = sys::GGML_OBJECT_SIZE;
62
63#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
64pub enum Type {
66 #[default]
68 Q4_0,
69 Q4_1,
71 Q4_2,
73 Q5_0,
75 Q5_1,
77 Q8_0,
79 Q8_1,
81 I32,
83 F16,
85 F32,
87}
88impl From<Type> for sys::ggml_type {
89 fn from(t: Type) -> Self {
90 match t {
91 Type::Q4_0 => sys::ggml_type_GGML_TYPE_Q4_0,
92 Type::Q4_1 => sys::ggml_type_GGML_TYPE_Q4_1,
93 Type::Q4_2 => sys::ggml_type_GGML_TYPE_Q4_2,
94 Type::Q5_0 => sys::ggml_type_GGML_TYPE_Q5_0,
95 Type::Q5_1 => sys::ggml_type_GGML_TYPE_Q5_1,
96 Type::Q8_0 => sys::ggml_type_GGML_TYPE_Q8_0,
97 Type::Q8_1 => sys::ggml_type_GGML_TYPE_Q8_1,
98 Type::I32 => sys::ggml_type_GGML_TYPE_I32,
99 Type::F16 => sys::ggml_type_GGML_TYPE_F16,
100 Type::F32 => sys::ggml_type_GGML_TYPE_F32,
101 }
102 }
103}
104impl TryFrom<sys::ggml_type> for Type {
105 type Error = ();
106 fn try_from(t: sys::ggml_type) -> Result<Self, Self::Error> {
107 match t {
108 sys::ggml_type_GGML_TYPE_Q4_0 => Ok(Type::Q4_0),
109 sys::ggml_type_GGML_TYPE_Q4_1 => Ok(Type::Q4_1),
110 sys::ggml_type_GGML_TYPE_Q4_2 => Ok(Type::Q4_2),
111 sys::ggml_type_GGML_TYPE_Q5_0 => Ok(Type::Q5_0),
112 sys::ggml_type_GGML_TYPE_Q5_1 => Ok(Type::Q5_1),
113 sys::ggml_type_GGML_TYPE_Q8_0 => Ok(Type::Q8_0),
114 sys::ggml_type_GGML_TYPE_Q8_1 => Ok(Type::Q8_1),
115 sys::ggml_type_GGML_TYPE_I32 => Ok(Type::I32),
116 sys::ggml_type_GGML_TYPE_F16 => Ok(Type::F16),
117 sys::ggml_type_GGML_TYPE_F32 => Ok(Type::F32),
118 _ => Err(()),
119 }
120 }
121}
122impl std::fmt::Display for Type {
123 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124 match self {
125 Type::Q4_0 => write!(f, "q4_0"),
126 Type::Q4_1 => write!(f, "q4_1"),
127 Type::Q4_2 => write!(f, "q4_2"),
128 Type::Q5_0 => write!(f, "q5_0"),
129 Type::Q5_1 => write!(f, "q5_1"),
130 Type::Q8_0 => write!(f, "q8_0"),
131 Type::Q8_1 => write!(f, "q8_1"),
132 Type::I32 => write!(f, "i32"),
133 Type::F16 => write!(f, "f16"),
134 Type::F32 => write!(f, "f32"),
135 }
136 }
137}
138
139pub struct Buffer {
143 data: Box<[u8]>,
144}
145
146impl Buffer {
147 pub fn new(size: usize) -> Self {
149 let mut data: Vec<u8> = Vec::with_capacity(size);
150
151 #[allow(clippy::uninit_vec)]
154 unsafe {
155 data.set_len(size);
156 }
157
158 Buffer {
159 data: data.into_boxed_slice(),
160 }
161 }
162}
163
164pub struct ComputationGraph {
166 inner: sys::ggml_cgraph,
167}
168
169impl ComputationGraph {
170 pub fn new(n_threads: usize) -> Self {
172 Self {
173 inner: sys::ggml_cgraph {
174 n_threads: usize_to_i32(n_threads),
175 ..unsafe { std::mem::zeroed::<sys::ggml_cgraph>() }
178 },
179 }
180 }
181
182 pub fn build_forward_expand(&mut self, tensor: &Tensor) {
184 unsafe { sys::ggml_build_forward_expand(&mut self.inner, tensor.ptr.as_ptr()) }
185 }
186}
187
188pub fn type_size(t: Type) -> usize {
190 unsafe { sys::ggml_type_size(t.into()) }
191}
192
193pub fn type_sizef(x: Type) -> f64 {
195 (unsafe { sys::ggml_type_sizef(x.into()) }) as f64
196}
197
198pub fn blck_size(t: Type) -> usize {
200 i32_to_usize(unsafe { sys::ggml_blck_size(t.into()) })
201}
202
203fn usize_to_i32(val: usize) -> i32 {
204 i32::try_from(val).unwrap()
205}
206
207fn usize_to_i64(val: usize) -> i64 {
208 i64::try_from(val).unwrap()
209}
210
211fn i32_to_usize(val: i32) -> usize {
212 usize::try_from(val).unwrap()
213}
214
215fn i64_to_usize(val: i64) -> usize {
216 usize::try_from(val).unwrap()
217}
218
219pub struct QuantizationResult {
221 pub output: Vec<u8>,
223 pub history: Vec<i64>,
225}
226
227pub fn quantize_q4_0(src: &[f32], n_elements: usize, n_elements_0: usize) -> QuantizationResult {
232 quantize_impl(src, n_elements, n_elements_0, sys::ggml_quantize_q4_0)
233}
234
235pub fn quantize_q4_1(src: &[f32], n_elements: usize, n_elements_0: usize) -> QuantizationResult {
240 quantize_impl(src, n_elements, n_elements_0, sys::ggml_quantize_q4_1)
241}
242
243fn quantize_impl(
244 src: &[f32],
245 n_elements: usize,
246 n_elements_0: usize,
247 quantizer: unsafe extern "C" fn(*const f32, *mut c_void, c_int, c_int, *mut i64) -> usize,
248) -> QuantizationResult {
249 assert_eq!(src.len(), n_elements);
250 assert_eq!(n_elements % n_elements_0, 0);
251
252 let mut output = vec![0u8; n_elements * 4];
254 let mut history = vec![0i64; 16];
255 let output_size = unsafe {
256 quantizer(
257 src.as_ptr(),
258 output.as_mut_ptr() as *mut c_void,
259 n_elements.try_into().unwrap(),
260 n_elements_0.try_into().unwrap(),
261 history.as_mut_ptr(),
262 )
263 };
264
265 output.resize(output_size, 0u8);
266 QuantizationResult { output, history }
267}