alith_models/local_model/gguf/tools/
gguf_file.rs

1//! Support for the GGUF file format.
2//!
3//! Spec: https://github.com/philpax/ggml/blob/gguf-spec/docs/gguf.md
4//! Adapted from: https://github.com/huggingface/candle/blob/main/candle-core/src/quantized/gguf_file.rs
5
6use super::gguf_tensors::{GgmlDType, TensorInfo};
7use byteorder::{LittleEndian, ReadBytesExt};
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11pub const DEFAULT_ALIGNMENT: u32 = 32;
12
13pub struct GgufFile {
14    pub magic: VersionedMagic,
15    pub metadata: HashMap<String, Value>,
16    pub tensors: Vec<TensorInfo>,
17    pub tensor_data_offset: u64,
18}
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21enum Magic {
22    Gguf,
23}
24
25impl TryFrom<u32> for Magic {
26    type Error = crate::Error;
27    fn try_from(value: u32) -> crate::Result<Self> {
28        let magic = match value {
29            0x46554747 | 0x47475546 => Self::Gguf,
30            _ => crate::bail!("unknown magic 0x{value:08x}"),
31        };
32        Ok(magic)
33    }
34}
35
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum VersionedMagic {
38    GgufV1,
39    GgufV2,
40    GgufV3,
41}
42
43impl VersionedMagic {
44    fn read<R: std::io::Read>(reader: &mut R) -> crate::Result<Self> {
45        let magic = reader.read_u32::<LittleEndian>()?;
46        let magic = Magic::try_from(magic)?;
47        let version = reader.read_u32::<LittleEndian>()?;
48        let versioned_magic = match (magic, version) {
49            (Magic::Gguf, 1) => Self::GgufV1,
50            (Magic::Gguf, 2) => Self::GgufV2,
51            (Magic::Gguf, 3) => Self::GgufV3,
52            _ => crate::bail!("gguf: unsupported magic/version {magic:?}/{version}"),
53        };
54        Ok(versioned_magic)
55    }
56}
57
58impl GgufFile {
59    pub fn read<R: std::io::Seek + std::io::Read>(reader: &mut R) -> crate::Result<Self> {
60        let magic = VersionedMagic::read(reader)?;
61
62        let tensor_count = match magic {
63            VersionedMagic::GgufV1 => reader.read_u32::<LittleEndian>()? as usize,
64            VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => {
65                reader.read_u64::<LittleEndian>()? as usize
66            }
67        };
68        let metadata_kv_count = match magic {
69            VersionedMagic::GgufV1 => reader.read_u32::<LittleEndian>()? as usize,
70            VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => {
71                reader.read_u64::<LittleEndian>()? as usize
72            }
73        };
74
75        let mut metadata = HashMap::new();
76        for _idx in 0..metadata_kv_count {
77            let key = read_string(reader, &magic)?;
78            let value_type = reader.read_u32::<LittleEndian>()?;
79            let value_type = ValueType::from_u32(value_type)?;
80            let value = Value::read(reader, value_type, &magic)?;
81            metadata.insert(key, value);
82        }
83
84        let mut tensor_infos = vec![];
85        for _idx in 0..tensor_count {
86            let tensor_name = read_string(reader, &magic)?;
87            let n_dimensions = reader.read_u32::<LittleEndian>()?;
88
89            let mut dimensions: Vec<usize> = match magic {
90                VersionedMagic::GgufV1 => {
91                    let mut dimensions = vec![0; n_dimensions as usize];
92                    reader.read_u32_into::<LittleEndian>(&mut dimensions)?;
93                    dimensions.into_iter().map(|c| c as usize).collect()
94                }
95                VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => {
96                    let mut dimensions = vec![0; n_dimensions as usize];
97                    reader.read_u64_into::<LittleEndian>(&mut dimensions)?;
98                    dimensions.into_iter().map(|c| c as usize).collect()
99                }
100            };
101            dimensions.reverse();
102
103            let ggml_dtype = reader.read_u32::<LittleEndian>()?;
104            let ggml_dtype = GgmlDType::from_u32(ggml_dtype)?;
105
106            let offset = reader.read_u64::<LittleEndian>()?;
107            tensor_infos.push(TensorInfo {
108                name: tensor_name,
109                shape: dimensions,
110                offset,
111                ggml_dtype,
112            });
113        }
114        let position = reader.stream_position()?;
115        let alignment = match metadata.get("general.alignment") {
116            Some(Value::U8(v)) => *v as u32,
117            Some(Value::U16(v)) => *v as u32,
118            Some(Value::U32(v)) => *v,
119            Some(Value::I8(v)) if *v >= 0 => *v as u32,
120            Some(Value::I16(v)) if *v >= 0 => *v as u32,
121            Some(Value::I32(v)) if *v >= 0 => *v as u32,
122            _ => DEFAULT_ALIGNMENT,
123        };
124        metadata.insert("general.alignment".to_string(), Value::U32(alignment));
125        let alignment = alignment as u64;
126        let tensor_data_offset = position.div_ceil(alignment) * alignment;
127        Ok(Self {
128            magic,
129            metadata,
130            tensors: tensor_infos,
131            tensor_data_offset,
132        })
133    }
134
135    pub fn get_value<T: FromValue>(&self, key: &str) -> crate::Result<T> {
136        match self.metadata.get(key) {
137            Some(value) => T::from_value(value),
138            None => T::from_none(key),
139        }
140    }
141
142    pub fn get_pathed_value<T: FromValue>(
143        &self,
144        path_prefixes: &[&str],
145        field_name: &str,
146    ) -> crate::Result<T> {
147        let prop_key = if path_prefixes.is_empty() {
148            field_name.to_string()
149        } else {
150            let prefix = path_prefixes.join(".");
151            format!("{}.{}", prefix, field_name)
152        };
153        self.get_value(&prop_key)
154    }
155
156    pub fn size(&self) -> u64 {
157        self.tensors.iter().map(|t| t.size()).sum()
158    }
159}
160
161fn read_string<R: std::io::Read>(reader: &mut R, magic: &VersionedMagic) -> crate::Result<String> {
162    let len = match magic {
163        VersionedMagic::GgufV1 => reader.read_u32::<LittleEndian>()? as usize,
164        VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => {
165            reader.read_u64::<LittleEndian>()? as usize
166        }
167    };
168    let mut v = vec![0u8; len];
169    reader.read_exact(&mut v)?;
170    // GGUF strings are supposed to be non-null terminated but in practice this happens.
171    while let Some(0) = v.last() {
172        v.pop();
173    }
174    // GGUF strings are utf8 encoded but there are cases that don't seem to be valid.
175    Ok(String::from_utf8_lossy(&v).into_owned())
176}
177
178#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
179pub enum ValueType {
180    // The value is a 8-bit unsigned integer.
181    U8,
182    // The value is a 8-bit signed integer.
183    I8,
184    // The value is a 16-bit unsigned little-endian integer.
185    U16,
186    // The value is a 16-bit signed little-endian integer.
187    I16,
188    // The value is a 32-bit unsigned little-endian integer.
189    U32,
190    // The value is a 32-bit signed little-endian integer.
191    I32,
192    // The value is a 64-bit unsigned little-endian integer.
193    U64,
194    // The value is a 64-bit signed little-endian integer.
195    I64,
196    // The value is a 32-bit IEEE754 floating point number.
197    F32,
198    // The value is a 64-bit IEEE754 floating point number.
199    F64,
200    // The value is a boolean.
201    // 1-byte value where 0 is false and 1 is true.
202    // Anything else is invalid, and should be treated as either the model being invalid or the reader being buggy.
203    Bool,
204    // The value is a UTF-8 non-null-terminated string, with length prepended.
205    String,
206    // The value is an array of other values, with the length and type prepended.
207    // Arrays can be nested, and the length of the array is the number of elements in the array, not the number of bytes.
208    Array,
209}
210
211impl ValueType {
212    fn from_u32(v: u32) -> crate::Result<Self> {
213        let v = match v {
214            0 => Self::U8,
215            1 => Self::I8,
216            2 => Self::U16,
217            3 => Self::I16,
218            4 => Self::U32,
219            5 => Self::I32,
220            6 => Self::F32,
221            7 => Self::Bool,
222            8 => Self::String,
223            9 => Self::Array,
224            10 => Self::U64,
225            11 => Self::I64,
226            12 => Self::F64,
227            v => {
228                let bytes = v.to_le_bytes();
229                let as_le = u32::from_le_bytes(bytes);
230                let as_be = u32::from_be_bytes(bytes);
231                let ascii_le = String::from_utf8_lossy(&bytes).to_string();
232                let ascii_be =
233                    String::from_utf8_lossy(&bytes.iter().rev().cloned().collect::<Vec<u8>>())
234                        .to_string();
235
236                crate::bail!(format!(
237                    "Unrecognized value-type: {v} (0x{v:08x})\n\
238                    As little-endian: {as_le} (0x{as_le:08x})\n\
239                    As big-endian: {as_be} (0x{as_be:08x})\n\
240                    ASCII (LE): {ascii_le}\n\
241                    ASCII (BE): {ascii_be}"
242                ))
243            }
244        };
245        Ok(v)
246    }
247}
248
249#[derive(Debug, Clone, Serialize, Deserialize)]
250pub enum Value {
251    U8(u8),
252    I8(i8),
253    U16(u16),
254    I16(i16),
255    U32(u32),
256    I32(i32),
257    U64(u64),
258    I64(i64),
259    F32(f32),
260    F64(f64),
261    Bool(bool),
262    String(String),
263    Array(Vec<Value>),
264}
265
266impl Value {
267    pub fn value_type(&self) -> ValueType {
268        match self {
269            Self::U8(_) => ValueType::U8,
270            Self::I8(_) => ValueType::I8,
271            Self::U16(_) => ValueType::U16,
272            Self::I16(_) => ValueType::I16,
273            Self::U32(_) => ValueType::U32,
274            Self::I32(_) => ValueType::I32,
275            Self::U64(_) => ValueType::U64,
276            Self::I64(_) => ValueType::I64,
277            Self::F32(_) => ValueType::F32,
278            Self::F64(_) => ValueType::F64,
279            Self::Bool(_) => ValueType::Bool,
280            Self::String(_) => ValueType::String,
281            Self::Array(_) => ValueType::Array,
282        }
283    }
284
285    pub fn to_u8(&self) -> crate::Result<u8> {
286        match self {
287            Self::U8(v) => Ok(*v),
288            v => crate::bail!("not a u8 {v:?}"),
289        }
290    }
291
292    pub fn to_i8(&self) -> crate::Result<i8> {
293        match self {
294            Self::I8(v) => Ok(*v),
295            v => crate::bail!("not a i8 {v:?}"),
296        }
297    }
298
299    pub fn to_u16(&self) -> crate::Result<u16> {
300        match self {
301            Self::U16(v) => Ok(*v),
302            v => crate::bail!("not a u16 {v:?}"),
303        }
304    }
305
306    pub fn to_i16(&self) -> crate::Result<i16> {
307        match self {
308            Self::I16(v) => Ok(*v),
309            v => crate::bail!("not a i16 {v:?}"),
310        }
311    }
312
313    pub fn to_u32(&self) -> crate::Result<u32> {
314        match self {
315            Self::U32(v) => Ok(*v),
316            v => crate::bail!("not a u32 {v:?}"),
317        }
318    }
319
320    pub fn to_i32(&self) -> crate::Result<i32> {
321        match self {
322            Self::I32(v) => Ok(*v),
323            v => crate::bail!("not a i32 {v:?}"),
324        }
325    }
326
327    /// This will also automatically upcast any integral types which will not truncate.
328    pub fn to_u64(&self) -> crate::Result<u64> {
329        match self {
330            Self::U64(v) => Ok(*v),
331            // Autoupcast cases here
332            Self::U8(v) => Ok(*v as u64),
333            Self::U16(v) => Ok(*v as u64),
334            Self::U32(v) => Ok(*v as u64),
335            Self::Bool(v) => Ok(*v as u64),
336            v => crate::bail!("not a u64 or upcastable to u64 {v:?}"),
337        }
338    }
339
340    pub fn to_i64(&self) -> crate::Result<i64> {
341        match self {
342            Self::I64(v) => Ok(*v),
343            v => crate::bail!("not a i64 {v:?}"),
344        }
345    }
346
347    pub fn to_f32(&self) -> crate::Result<f32> {
348        match self {
349            Self::F32(v) => Ok(*v),
350            v => crate::bail!("not a f32 {v:?}"),
351        }
352    }
353
354    pub fn to_f64(&self) -> crate::Result<f64> {
355        match self {
356            Self::F64(v) => Ok(*v),
357            v => crate::bail!("not a f64 {v:?}"),
358        }
359    }
360
361    pub fn to_bool(&self) -> crate::Result<bool> {
362        match self {
363            Self::Bool(v) => Ok(*v),
364            v => crate::bail!("not a bool {v:?}"),
365        }
366    }
367
368    pub fn to_vec(&self) -> crate::Result<&Vec<Value>> {
369        match self {
370            Self::Array(v) => Ok(v),
371            v => crate::bail!("not a vec {v:?}"),
372        }
373    }
374
375    pub fn to_string(&self) -> crate::Result<&String> {
376        match self {
377            Self::String(v) => Ok(v),
378            v => crate::bail!("not a string {v:?}"),
379        }
380    }
381
382    fn read<R: std::io::Read>(
383        reader: &mut R,
384        value_type: ValueType,
385        magic: &VersionedMagic,
386    ) -> crate::Result<Self> {
387        let v = match value_type {
388            ValueType::U8 => Self::U8(reader.read_u8()?),
389            ValueType::I8 => Self::I8(reader.read_i8()?),
390            ValueType::U16 => Self::U16(reader.read_u16::<LittleEndian>()?),
391            ValueType::I16 => Self::I16(reader.read_i16::<LittleEndian>()?),
392            ValueType::U32 => Self::U32(reader.read_u32::<LittleEndian>()?),
393            ValueType::I32 => Self::I32(reader.read_i32::<LittleEndian>()?),
394            ValueType::U64 => Self::U64(reader.read_u64::<LittleEndian>()?),
395            ValueType::I64 => Self::I64(reader.read_i64::<LittleEndian>()?),
396            ValueType::F32 => Self::F32(reader.read_f32::<LittleEndian>()?),
397            ValueType::F64 => Self::F64(reader.read_f64::<LittleEndian>()?),
398            ValueType::Bool => match reader.read_u8()? {
399                0 => Self::Bool(false),
400                1 => Self::Bool(true),
401                b => crate::bail!("unexpected bool value {b}"),
402            },
403            ValueType::String => Self::String(read_string(reader, magic)?),
404            ValueType::Array => {
405                let value_type = reader.read_u32::<LittleEndian>()?;
406                let value_type = ValueType::from_u32(value_type)?;
407                let len = match magic {
408                    VersionedMagic::GgufV1 => reader.read_u32::<LittleEndian>()? as usize,
409                    VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => {
410                        reader.read_u64::<LittleEndian>()? as usize
411                    }
412                };
413                let mut vs = Vec::with_capacity(len);
414                for _ in 0..len {
415                    vs.push(Value::read(reader, value_type, magic)?)
416                }
417                Self::Array(vs)
418            }
419        };
420        Ok(v)
421    }
422}
423
424pub trait FromValue: Sized {
425    fn from_value(value: &Value) -> crate::Result<Self>;
426
427    fn from_none(key: &str) -> crate::Result<Self> {
428        crate::bail!("missing key {key}")
429    }
430}
431
432impl FromValue for String {
433    fn from_value(value: &Value) -> crate::Result<Self> {
434        value.to_string().cloned()
435    }
436}
437
438impl FromValue for u64 {
439    fn from_value(value: &Value) -> crate::Result<Self> {
440        value.to_u64()
441    }
442}
443
444impl FromValue for u32 {
445    fn from_value(value: &Value) -> crate::Result<Self> {
446        value.to_u32()
447    }
448}
449
450impl FromValue for f32 {
451    fn from_value(value: &Value) -> crate::Result<Self> {
452        value.to_f32()
453    }
454}
455
456impl FromValue for bool {
457    fn from_value(value: &Value) -> crate::Result<Self> {
458        value.to_bool()
459    }
460}
461
462impl<T: FromValue> FromValue for Vec<T> {
463    fn from_value(value: &Value) -> crate::Result<Self> {
464        match value {
465            Value::Array(arr) => arr.iter().map(T::from_value).collect(),
466            _ => crate::bail!("not an array"),
467        }
468    }
469}
470
471impl<T: FromValue> FromValue for Option<T> {
472    fn from_value(value: &Value) -> crate::Result<Self> {
473        Ok(Some(T::from_value(value)?))
474    }
475
476    fn from_none(_key: &str) -> crate::Result<Self> {
477        Ok(None)
478    }
479}