pub const GGUF_MAGIC: u32 = 0x46554747;
pub const GGUF_VERSION_V1: u32 = 1;
pub const GGUF_VERSION_V2: u32 = 2;
pub const GGUF_VERSION_V3: u32 = 3;
pub const GGUF_DEFAULT_ALIGNMENT: usize = 32;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u32)]
pub enum GgufMetadataValueType {
Uint8 = 0,
Int8 = 1,
Uint16 = 2,
Int16 = 3,
Uint32 = 4,
Int32 = 5,
Float32 = 6,
Bool = 7,
String = 8,
Array = 9,
Uint64 = 10,
Int64 = 11,
Float64 = 12,
}
impl TryFrom<u32> for GgufMetadataValueType {
type Error = u32;
fn try_from(value: u32) -> Result<Self, Self::Error> {
match value {
0 => Ok(Self::Uint8),
1 => Ok(Self::Int8),
2 => Ok(Self::Uint16),
3 => Ok(Self::Int16),
4 => Ok(Self::Uint32),
5 => Ok(Self::Int32),
6 => Ok(Self::Float32),
7 => Ok(Self::Bool),
8 => Ok(Self::String),
9 => Ok(Self::Array),
10 => Ok(Self::Uint64),
11 => Ok(Self::Int64),
12 => Ok(Self::Float64),
_ => Err(value),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u32)]
pub enum GgmlType {
F32 = 0,
F16 = 1,
Q4_0 = 2,
Q4_1 = 3,
Q5_0 = 6,
Q5_1 = 7,
Q8_0 = 8,
Q8_1 = 9,
Q2K = 10,
Q3K = 11,
Q4K = 12,
Q5K = 13,
Q6K = 14,
Q8K = 15,
IQ2XXS = 16,
IQ2XS = 17,
IQ3XXS = 18,
IQ1S = 19,
IQ4NL = 20,
IQ3S = 21,
IQ2S = 22,
IQ4XS = 23,
I8 = 24,
I16 = 25,
I32 = 26,
I64 = 27,
F64 = 28,
IQ1M = 29,
BF16 = 30,
TQ1_0 = 34,
TQ2_0 = 35,
}
impl TryFrom<u32> for GgmlType {
type Error = u32;
fn try_from(value: u32) -> Result<Self, Self::Error> {
match value {
0 => Ok(Self::F32),
1 => Ok(Self::F16),
2 => Ok(Self::Q4_0),
3 => Ok(Self::Q4_1),
6 => Ok(Self::Q5_0),
7 => Ok(Self::Q5_1),
8 => Ok(Self::Q8_0),
9 => Ok(Self::Q8_1),
10 => Ok(Self::Q2K),
11 => Ok(Self::Q3K),
12 => Ok(Self::Q4K),
13 => Ok(Self::Q5K),
14 => Ok(Self::Q6K),
15 => Ok(Self::Q8K),
16 => Ok(Self::IQ2XXS),
17 => Ok(Self::IQ2XS),
18 => Ok(Self::IQ3XXS),
19 => Ok(Self::IQ1S),
20 => Ok(Self::IQ4NL),
21 => Ok(Self::IQ3S),
22 => Ok(Self::IQ2S),
23 => Ok(Self::IQ4XS),
24 => Ok(Self::I8),
25 => Ok(Self::I16),
26 => Ok(Self::I32),
27 => Ok(Self::I64),
28 => Ok(Self::F64),
29 => Ok(Self::IQ1M),
30 => Ok(Self::BF16),
34 => Ok(Self::TQ1_0),
35 => Ok(Self::TQ2_0),
_ => Err(value),
}
}
}
impl GgmlType {
pub const fn block_size(&self) -> usize {
match self {
Self::F32 | Self::F16 | Self::BF16 | Self::F64 => 1,
Self::I8 | Self::I16 | Self::I32 | Self::I64 => 1,
Self::Q4_0 | Self::Q4_1 | Self::Q5_0 | Self::Q5_1 | Self::Q8_0 | Self::Q8_1 => 32,
Self::Q2K | Self::Q3K | Self::Q4K | Self::Q5K | Self::Q6K | Self::Q8K => 256,
Self::IQ2XXS
| Self::IQ2XS
| Self::IQ2S
| Self::IQ3XXS
| Self::IQ3S
| Self::IQ4XS
| Self::IQ4NL
| Self::IQ1S
| Self::IQ1M => 256,
Self::TQ1_0 => 256,
Self::TQ2_0 => 256,
}
}
pub const fn type_size(&self) -> usize {
match self {
Self::F32 => 4,
Self::F16 | Self::BF16 => 2,
Self::F64 => 8,
Self::I8 => 1,
Self::I16 => 2,
Self::I32 => 4,
Self::I64 => 8,
Self::Q4_0 => 18,
Self::Q4_1 => 20,
Self::Q5_0 => 22,
Self::Q5_1 => 24,
Self::Q8_0 => 34,
Self::Q8_1 => 36,
Self::Q2K => 84,
Self::Q3K => 110,
Self::Q4K => 144,
Self::Q5K => 176,
Self::Q6K => 210,
Self::Q8K => 292,
Self::IQ2XXS => 66,
Self::IQ2XS => 74,
Self::IQ2S => 82,
Self::IQ3XXS => 98,
Self::IQ3S => 110,
Self::IQ4XS => 136,
Self::IQ4NL => 132,
Self::IQ1S => 50,
Self::IQ1M => 56,
Self::TQ1_0 => 54,
Self::TQ2_0 => 66,
}
}
}