Skip to main content

candle_core/quantized/
gguf_file.rs

1//! Support for the [GGUF file format](https://github.com/philpax/ggml/blob/gguf-spec/docs/gguf.md).
2//!
3//! Spec: https://github.com/ggml-org/ggml/blob/master/docs/gguf.md  
4
5use super::{GgmlDType, QTensor};
6use crate::{Context, Device, Result};
7use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
8use std::collections::HashMap;
9
10pub const DEFAULT_ALIGNMENT: u64 = 32;
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13enum Magic {
14    Gguf,
15}
16
17impl TryFrom<u32> for Magic {
18    type Error = crate::Error;
19    fn try_from(value: u32) -> Result<Self> {
20        let magic = match value {
21            0x46554747 | 0x47475546 => Self::Gguf,
22            _ => crate::bail!("unknown magic 0x{value:08x}"),
23        };
24        Ok(magic)
25    }
26}
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub enum VersionedMagic {
30    GgufV1,
31    GgufV2,
32    GgufV3,
33}
34
35impl VersionedMagic {
36    fn read<R: std::io::Read>(reader: &mut R) -> Result<Self> {
37        let magic = reader.read_u32::<LittleEndian>()?;
38        let magic = Magic::try_from(magic)?;
39        let version = reader.read_u32::<LittleEndian>()?;
40        let versioned_magic = match (magic, version) {
41            (Magic::Gguf, 1) => Self::GgufV1,
42            (Magic::Gguf, 2) => Self::GgufV2,
43            (Magic::Gguf, 3) => Self::GgufV3,
44            _ => crate::bail!("gguf: unsupported magic/version {magic:?}/{version}"),
45        };
46        Ok(versioned_magic)
47    }
48}
49
50#[derive(Debug)]
51pub struct TensorInfo {
52    pub ggml_dtype: GgmlDType,
53    pub shape: crate::Shape,
54    pub offset: u64,
55}
56
57impl TensorInfo {
58    pub fn read<R: std::io::Seek + std::io::Read>(
59        &self,
60        reader: &mut R,
61        tensor_data_offset: u64,
62        device: &Device,
63    ) -> Result<QTensor> {
64        let tensor_elems = self.shape.elem_count();
65        let block_size = self.ggml_dtype.block_size();
66        if !tensor_elems.is_multiple_of(block_size) {
67            crate::bail!(
68            "the number of elements {tensor_elems} is not divisible by the block size {block_size}"
69        )
70        }
71        let size_in_bytes = tensor_elems / block_size * self.ggml_dtype.type_size();
72        let mut raw_data = vec![0u8; size_in_bytes];
73        reader.seek(std::io::SeekFrom::Start(tensor_data_offset + self.offset))?;
74        reader.read_exact(&mut raw_data)?;
75        super::ggml_file::qtensor_from_ggml(
76            self.ggml_dtype,
77            &raw_data,
78            self.shape.dims().to_vec(),
79            device,
80        )
81    }
82}
83
84#[derive(Debug)]
85pub struct Content {
86    pub magic: VersionedMagic,
87    pub metadata: HashMap<String, Value>,
88    pub tensor_infos: HashMap<String, TensorInfo>,
89    pub tensor_data_offset: u64,
90}
91
92fn read_string<R: std::io::Read>(reader: &mut R, magic: &VersionedMagic) -> Result<String> {
93    let len = match magic {
94        VersionedMagic::GgufV1 => reader.read_u32::<LittleEndian>()? as usize,
95        VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => {
96            reader.read_u64::<LittleEndian>()? as usize
97        }
98    };
99    let mut v = vec![0u8; len];
100    reader.read_exact(&mut v)?;
101    // GGUF strings are supposed to be non-null terminated but in practice this happens.
102    while let Some(0) = v.last() {
103        v.pop();
104    }
105    // GGUF strings are utf8 encoded but there are cases that don't seem to be valid.
106    Ok(String::from_utf8_lossy(&v).into_owned())
107}
108
109#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
110pub enum ValueType {
111    // The value is a 8-bit unsigned integer.
112    U8,
113    // The value is a 8-bit signed integer.
114    I8,
115    // The value is a 16-bit unsigned little-endian integer.
116    U16,
117    // The value is a 16-bit signed little-endian integer.
118    I16,
119    // The value is a 32-bit unsigned little-endian integer.
120    U32,
121    // The value is a 32-bit signed little-endian integer.
122    I32,
123    // The value is a 64-bit unsigned little-endian integer.
124    U64,
125    // The value is a 64-bit signed little-endian integer.
126    I64,
127    // The value is a 32-bit IEEE754 floating point number.
128    F32,
129    // The value is a 64-bit IEEE754 floating point number.
130    F64,
131    // The value is a boolean.
132    // 1-byte value where 0 is false and 1 is true.
133    // Anything else is invalid, and should be treated as either the model being invalid or the reader being buggy.
134    Bool,
135    // The value is a UTF-8 non-null-terminated string, with length prepended.
136    String,
137    // The value is an array of other values, with the length and type prepended.
138    // Arrays can be nested, and the length of the array is the number of elements in the array, not the number of bytes.
139    Array,
140}
141
142#[derive(Debug, Clone)]
143pub enum Value {
144    U8(u8),
145    I8(i8),
146    U16(u16),
147    I16(i16),
148    U32(u32),
149    I32(i32),
150    U64(u64),
151    I64(i64),
152    F32(f32),
153    F64(f64),
154    Bool(bool),
155    String(String),
156    Array(Vec<Value>),
157}
158
159impl Value {
160    pub fn value_type(&self) -> ValueType {
161        match self {
162            Self::U8(_) => ValueType::U8,
163            Self::I8(_) => ValueType::I8,
164            Self::U16(_) => ValueType::U16,
165            Self::I16(_) => ValueType::I16,
166            Self::U32(_) => ValueType::U32,
167            Self::I32(_) => ValueType::I32,
168            Self::U64(_) => ValueType::U64,
169            Self::I64(_) => ValueType::I64,
170            Self::F32(_) => ValueType::F32,
171            Self::F64(_) => ValueType::F64,
172            Self::Bool(_) => ValueType::Bool,
173            Self::String(_) => ValueType::String,
174            Self::Array(_) => ValueType::Array,
175        }
176    }
177
178    pub fn to_u8(&self) -> Result<u8> {
179        match self {
180            Self::U8(v) => Ok(*v),
181            v => crate::bail!("not a u8 {v:?}"),
182        }
183    }
184
185    pub fn to_i8(&self) -> Result<i8> {
186        match self {
187            Self::I8(v) => Ok(*v),
188            v => crate::bail!("not a i8 {v:?}"),
189        }
190    }
191
192    pub fn to_u16(&self) -> Result<u16> {
193        match self {
194            Self::U16(v) => Ok(*v),
195            v => crate::bail!("not a u16 {v:?}"),
196        }
197    }
198
199    pub fn to_i16(&self) -> Result<i16> {
200        match self {
201            Self::I16(v) => Ok(*v),
202            v => crate::bail!("not a i16 {v:?}"),
203        }
204    }
205
206    pub fn to_u32(&self) -> Result<u32> {
207        match self {
208            Self::U32(v) => Ok(*v),
209            v => crate::bail!("not a u32 {v:?}"),
210        }
211    }
212
213    pub fn to_i32(&self) -> Result<i32> {
214        match self {
215            Self::I32(v) => Ok(*v),
216            v => crate::bail!("not a i32 {v:?}"),
217        }
218    }
219
220    /// This will also automatically upcast any integral types which will not truncate.
221    pub fn to_u64(&self) -> Result<u64> {
222        match self {
223            Self::U64(v) => Ok(*v),
224            // Autoupcast cases here
225            Self::U8(v) => Ok(*v as u64),
226            Self::U16(v) => Ok(*v as u64),
227            Self::U32(v) => Ok(*v as u64),
228            Self::Bool(v) => Ok(*v as u64),
229            v => crate::bail!("not a u64 or upcastable to u64 {v:?}"),
230        }
231    }
232
233    pub fn to_i64(&self) -> Result<i64> {
234        match self {
235            Self::I64(v) => Ok(*v),
236            v => crate::bail!("not a i64 {v:?}"),
237        }
238    }
239
240    pub fn to_f32(&self) -> Result<f32> {
241        match self {
242            Self::F32(v) => Ok(*v),
243            v => crate::bail!("not a f32 {v:?}"),
244        }
245    }
246
247    pub fn to_f64(&self) -> Result<f64> {
248        match self {
249            Self::F64(v) => Ok(*v),
250            v => crate::bail!("not a f64 {v:?}"),
251        }
252    }
253
254    pub fn to_bool(&self) -> Result<bool> {
255        match self {
256            Self::Bool(v) => Ok(*v),
257            v => crate::bail!("not a bool {v:?}"),
258        }
259    }
260
261    pub fn to_vec(&self) -> Result<&Vec<Value>> {
262        match self {
263            Self::Array(v) => Ok(v),
264            v => crate::bail!("not a vec {v:?}"),
265        }
266    }
267
268    pub fn to_string(&self) -> Result<&String> {
269        match self {
270            Self::String(v) => Ok(v),
271            v => crate::bail!("not a string {v:?}"),
272        }
273    }
274
275    fn read<R: std::io::Read>(
276        reader: &mut R,
277        value_type: ValueType,
278        magic: &VersionedMagic,
279    ) -> Result<Self> {
280        let v = match value_type {
281            ValueType::U8 => Self::U8(reader.read_u8()?),
282            ValueType::I8 => Self::I8(reader.read_i8()?),
283            ValueType::U16 => Self::U16(reader.read_u16::<LittleEndian>()?),
284            ValueType::I16 => Self::I16(reader.read_i16::<LittleEndian>()?),
285            ValueType::U32 => Self::U32(reader.read_u32::<LittleEndian>()?),
286            ValueType::I32 => Self::I32(reader.read_i32::<LittleEndian>()?),
287            ValueType::U64 => Self::U64(reader.read_u64::<LittleEndian>()?),
288            ValueType::I64 => Self::I64(reader.read_i64::<LittleEndian>()?),
289            ValueType::F32 => Self::F32(reader.read_f32::<LittleEndian>()?),
290            ValueType::F64 => Self::F64(reader.read_f64::<LittleEndian>()?),
291            ValueType::Bool => match reader.read_u8()? {
292                0 => Self::Bool(false),
293                1 => Self::Bool(true),
294                b => crate::bail!("unexpected bool value {b}"),
295            },
296            ValueType::String => Self::String(read_string(reader, magic)?),
297            ValueType::Array => {
298                let value_type = reader.read_u32::<LittleEndian>()?;
299                let value_type = ValueType::from_u32(value_type)?;
300                let len = match magic {
301                    VersionedMagic::GgufV1 => reader.read_u32::<LittleEndian>()? as usize,
302                    VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => {
303                        reader.read_u64::<LittleEndian>()? as usize
304                    }
305                };
306                let mut vs = Vec::with_capacity(len);
307                for _ in 0..len {
308                    vs.push(Value::read(reader, value_type, magic)?)
309                }
310                Self::Array(vs)
311            }
312        };
313        Ok(v)
314    }
315
316    fn write<W: std::io::Write>(&self, w: &mut W) -> Result<()> {
317        match self {
318            &Self::U8(v) => w.write_u8(v)?,
319            &Self::I8(v) => w.write_i8(v)?,
320            &Self::U16(v) => w.write_u16::<LittleEndian>(v)?,
321            &Self::I16(v) => w.write_i16::<LittleEndian>(v)?,
322            &Self::U32(v) => w.write_u32::<LittleEndian>(v)?,
323            &Self::I32(v) => w.write_i32::<LittleEndian>(v)?,
324            &Self::U64(v) => w.write_u64::<LittleEndian>(v)?,
325            &Self::I64(v) => w.write_i64::<LittleEndian>(v)?,
326            &Self::F32(v) => w.write_f32::<LittleEndian>(v)?,
327            &Self::F64(v) => w.write_f64::<LittleEndian>(v)?,
328            &Self::Bool(v) => w.write_u8(u8::from(v))?,
329            Self::String(v) => write_string(w, v.as_str())?,
330            Self::Array(v) => {
331                // The `Value` type does not enforce that all the values in an Array have the same
332                // type.
333                let value_type = if v.is_empty() {
334                    // Doesn't matter, the array is empty.
335                    ValueType::U32
336                } else {
337                    let value_type: std::collections::HashSet<_> =
338                        v.iter().map(|elem| elem.value_type()).collect();
339                    if value_type.len() != 1 {
340                        crate::bail!("multiple value-types in the same array {value_type:?}")
341                    }
342                    value_type.into_iter().next().context("empty value_type")?
343                };
344                w.write_u32::<LittleEndian>(value_type.to_u32())?;
345                w.write_u64::<LittleEndian>(v.len() as u64)?;
346                for elem in v.iter() {
347                    elem.write(w)?
348                }
349            }
350        }
351        Ok(())
352    }
353}
354
355impl ValueType {
356    fn from_u32(v: u32) -> Result<Self> {
357        let v = match v {
358            0 => Self::U8,
359            1 => Self::I8,
360            2 => Self::U16,
361            3 => Self::I16,
362            4 => Self::U32,
363            5 => Self::I32,
364            6 => Self::F32,
365            7 => Self::Bool,
366            8 => Self::String,
367            9 => Self::Array,
368            10 => Self::U64,
369            11 => Self::I64,
370            12 => Self::F64,
371            v => crate::bail!("unrecognized value-type {v:#08x}"),
372        };
373        Ok(v)
374    }
375
376    fn to_u32(self) -> u32 {
377        match self {
378            Self::U8 => 0,
379            Self::I8 => 1,
380            Self::U16 => 2,
381            Self::I16 => 3,
382            Self::U32 => 4,
383            Self::I32 => 5,
384            Self::F32 => 6,
385            Self::Bool => 7,
386            Self::String => 8,
387            Self::Array => 9,
388            Self::U64 => 10,
389            Self::I64 => 11,
390            Self::F64 => 12,
391        }
392    }
393}
394
395impl Content {
396    pub fn read<R: std::io::Seek + std::io::Read>(reader: &mut R) -> Result<Self> {
397        let magic = VersionedMagic::read(reader)?;
398
399        let tensor_count = match magic {
400            VersionedMagic::GgufV1 => reader.read_u32::<LittleEndian>()? as usize,
401            VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => {
402                reader.read_u64::<LittleEndian>()? as usize
403            }
404        };
405        let metadata_kv_count = match magic {
406            VersionedMagic::GgufV1 => reader.read_u32::<LittleEndian>()? as usize,
407            VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => {
408                reader.read_u64::<LittleEndian>()? as usize
409            }
410        };
411
412        let mut metadata = HashMap::new();
413        for _idx in 0..metadata_kv_count {
414            let key = read_string(reader, &magic)?;
415            let value_type = reader.read_u32::<LittleEndian>()?;
416            let value_type = ValueType::from_u32(value_type)?;
417            let value = Value::read(reader, value_type, &magic)?;
418            metadata.insert(key, value);
419        }
420        let mut tensor_infos = HashMap::new();
421        for _idx in 0..tensor_count {
422            let tensor_name = read_string(reader, &magic)?;
423            let n_dimensions = reader.read_u32::<LittleEndian>()?;
424
425            let mut dimensions: Vec<usize> = match magic {
426                VersionedMagic::GgufV1 => {
427                    let mut dimensions = vec![0; n_dimensions as usize];
428                    reader.read_u32_into::<LittleEndian>(&mut dimensions)?;
429                    dimensions.into_iter().map(|c| c as usize).collect()
430                }
431                VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => {
432                    let mut dimensions = vec![0; n_dimensions as usize];
433                    reader.read_u64_into::<LittleEndian>(&mut dimensions)?;
434                    dimensions.into_iter().map(|c| c as usize).collect()
435                }
436            };
437
438            dimensions.reverse();
439            let ggml_dtype = reader.read_u32::<LittleEndian>()?;
440            let ggml_dtype = GgmlDType::from_u32(ggml_dtype)?;
441            let offset = reader.read_u64::<LittleEndian>()?;
442            tensor_infos.insert(
443                tensor_name,
444                TensorInfo {
445                    shape: crate::Shape::from(dimensions),
446                    offset,
447                    ggml_dtype,
448                },
449            );
450        }
451        let position = reader.stream_position()?;
452        let alignment = match metadata.get("general.alignment") {
453            Some(Value::U8(v)) => *v as u64,
454            Some(Value::U16(v)) => *v as u64,
455            Some(Value::U32(v)) => *v as u64,
456            Some(Value::I8(v)) if *v >= 0 => *v as u64,
457            Some(Value::I16(v)) if *v >= 0 => *v as u64,
458            Some(Value::I32(v)) if *v >= 0 => *v as u64,
459            _ => DEFAULT_ALIGNMENT,
460        };
461        let tensor_data_offset = position.div_ceil(alignment) * alignment;
462        Ok(Self {
463            magic,
464            metadata,
465            tensor_infos,
466            tensor_data_offset,
467        })
468    }
469
470    pub fn tensor<R: std::io::Seek + std::io::Read>(
471        &self,
472        reader: &mut R,
473        name: &str,
474        device: &Device,
475    ) -> Result<QTensor> {
476        let tensor_info = match self.tensor_infos.get(name) {
477            Some(tensor_info) => tensor_info,
478            None => crate::bail!("cannot find tensor info for {name}"),
479        };
480        tensor_info.read(reader, self.tensor_data_offset, device)
481    }
482}
483
484fn write_string<W: std::io::Write>(w: &mut W, str: &str) -> Result<()> {
485    let bytes = str.as_bytes();
486    w.write_u64::<LittleEndian>(bytes.len() as u64)?;
487    w.write_all(bytes)?;
488    Ok(())
489}
490
491pub fn write<W: std::io::Seek + std::io::Write>(
492    w: &mut W,
493    metadata: &[(&str, &Value)],
494    tensors: &[(&str, &QTensor)],
495) -> Result<()> {
496    w.write_u32::<LittleEndian>(0x46554747)?;
497    w.write_u32::<LittleEndian>(2)?; // version 2.
498    w.write_u64::<LittleEndian>(tensors.len() as u64)?;
499    w.write_u64::<LittleEndian>(metadata.len() as u64)?;
500    for (name, value) in metadata.iter() {
501        write_string(w, name)?;
502        w.write_u32::<LittleEndian>(value.value_type().to_u32())?;
503        value.write(w)?;
504    }
505    let mut offset = 0usize;
506    let mut offsets = Vec::with_capacity(tensors.len());
507    for (name, tensor) in tensors.iter() {
508        write_string(w, name)?;
509        let dims = tensor.shape().dims();
510        w.write_u32::<LittleEndian>(dims.len() as u32)?;
511        for &dim in dims.iter().rev() {
512            w.write_u64::<LittleEndian>(dim as u64)?;
513        }
514        w.write_u32::<LittleEndian>(tensor.dtype().to_u32())?;
515        w.write_u64::<LittleEndian>(offset as u64)?;
516        offsets.push(offset);
517        let size_in_bytes = tensor.storage_size_in_bytes();
518        let padding = 31 - (31 + size_in_bytes) % 32;
519        offset += size_in_bytes + padding;
520    }
521    let pos = w.stream_position()? as usize;
522    let padding = 31 - (31 + pos) % 32;
523    w.write_all(&vec![0u8; padding])?;
524    let tensor_start_pos = w.stream_position()? as usize;
525    for (offset, (_name, tensor)) in offsets.iter().zip(tensors.iter()) {
526        let pos = w.stream_position()? as usize;
527        if tensor_start_pos + offset != pos {
528            crate::bail!(
529                "internal error, unexpected current position {tensor_start_pos} {offset} {pos}"
530            )
531        }
532        let data = tensor.data()?;
533        let size_in_bytes = data.len();
534        w.write_all(&data)?;
535        let padding = 31 - (31 + size_in_bytes) % 32;
536        w.write_all(&vec![0u8; padding])?;
537    }
538    Ok(())
539}