use std::collections::HashMap;
pub const GGUF_MAGIC: u32 = 0x4655_4747;
pub const GGUF_VERSION_V2: u32 = 2;
pub const GGUF_VERSION_V3: u32 = 3;
pub const GGUF_TYPE_F32: u32 = 0;
pub const GGUF_TYPE_F16: u32 = 1;
pub const GGUF_TYPE_Q4_0: u32 = 2;
pub const GGUF_TYPE_Q4_1: u32 = 3;
pub const GGUF_TYPE_Q5_0: u32 = 6;
pub const GGUF_TYPE_Q5_1: u32 = 7;
pub const GGUF_TYPE_Q8_0: u32 = 8;
pub const GGUF_TYPE_Q2_K: u32 = 10;
pub const GGUF_TYPE_Q3_K: u32 = 11;
pub const GGUF_TYPE_Q4_K: u32 = 12;
pub const GGUF_TYPE_Q5_K: u32 = 13;
pub const GGUF_TYPE_Q6_K: u32 = 14;
pub const GGUF_TYPE_BF16: u32 = 30;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u32)]
pub enum GgmlQuantType {
F32 = 0,
F16 = 1,
Q4_0 = 2,
Q4_1 = 3,
Q5_0 = 6,
Q5_1 = 7,
Q8_0 = 8,
Q8_1 = 9,
Q2K = 10,
Q3K = 11,
Q4K = 12,
Q5K = 13,
Q6K = 14,
IQ2XXS = 16,
IQ2XS = 17,
BF16 = 30,
}
impl GgmlQuantType {
#[must_use]
pub const fn from_id(id: u32) -> Option<Self> {
match id {
0 => Some(Self::F32),
1 => Some(Self::F16),
2 => Some(Self::Q4_0),
3 => Some(Self::Q4_1),
6 => Some(Self::Q5_0),
7 => Some(Self::Q5_1),
8 => Some(Self::Q8_0),
9 => Some(Self::Q8_1),
10 => Some(Self::Q2K),
11 => Some(Self::Q3K),
12 => Some(Self::Q4K),
13 => Some(Self::Q5K),
14 => Some(Self::Q6K),
16 => Some(Self::IQ2XXS),
17 => Some(Self::IQ2XS),
30 => Some(Self::BF16),
_ => None,
}
}
#[must_use]
pub const fn as_id(self) -> u32 {
self as u32
}
#[must_use]
pub const fn as_byte(self) -> u8 {
self as u8
}
#[must_use]
pub const fn as_str(self) -> &'static str {
match self {
Self::F32 => "F32",
Self::F16 => "F16",
Self::Q4_0 => "Q4_0",
Self::Q4_1 => "Q4_1",
Self::Q5_0 => "Q5_0",
Self::Q5_1 => "Q5_1",
Self::Q8_0 => "Q8_0",
Self::Q8_1 => "Q8_1",
Self::Q2K => "Q2_K",
Self::Q3K => "Q3_K",
Self::Q4K => "Q4_K",
Self::Q5K => "Q5_K",
Self::Q6K => "Q6_K",
Self::IQ2XXS => "IQ2_XXS",
Self::IQ2XS => "IQ2_XS",
Self::BF16 => "BF16",
}
}
#[must_use]
pub fn from_str_lossy(s: &str) -> Option<Self> {
match s {
"F32" | "f32" => Some(Self::F32),
"F16" | "f16" => Some(Self::F16),
"BF16" | "bf16" => Some(Self::BF16),
"Q4_0" | "q4_0" => Some(Self::Q4_0),
"Q4_1" | "q4_1" => Some(Self::Q4_1),
"Q5_0" | "q5_0" => Some(Self::Q5_0),
"Q5_1" | "q5_1" => Some(Self::Q5_1),
"Q8_0" | "q8_0" => Some(Self::Q8_0),
"Q8_1" | "q8_1" => Some(Self::Q8_1),
"Q2_K" | "q2_k" => Some(Self::Q2K),
"Q3_K" | "q3_k" => Some(Self::Q3K),
"Q4_K" | "q4_k" => Some(Self::Q4K),
"Q5_K" | "q5_k" => Some(Self::Q5K),
"Q6_K" | "q6_k" => Some(Self::Q6K),
"IQ2_XXS" | "iq2_xxs" => Some(Self::IQ2XXS),
"IQ2_XS" | "iq2_xs" => Some(Self::IQ2XS),
_ => None,
}
}
}
impl std::fmt::Display for GgmlQuantType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
pub const TOKEN_BUFFER_INLINE_CAP: usize = 32;
pub const ATTENTION_BUFFER_INLINE_CAP: usize = 64;
pub const HIDDEN_BUFFER_INLINE_CAP: usize = 128;
pub const BUFFER_LW_SIZE: usize = 1024;
pub const BUFFER_HW_SIZE: usize = 8 * 1024;
pub const BUFFER_MAX_SIZE: usize = 32 * 1024;
pub type TokenBuffer = smallvec::SmallVec<[u32; TOKEN_BUFFER_INLINE_CAP]>;
pub type AttentionBuffer = smallvec::SmallVec<[f32; ATTENTION_BUFFER_INLINE_CAP]>;
pub type HiddenBuffer = smallvec::SmallVec<[f32; HIDDEN_BUFFER_INLINE_CAP]>;
pub const GGUF_ALIGNMENT: usize = 32;
#[derive(Debug, Clone, PartialEq)]
pub enum GGUFValue {
UInt8(u8),
Int8(i8),
UInt16(u16),
Int16(i16),
UInt32(u32),
Int32(i32),
Float32(f32),
Bool(bool),
String(String),
Array(Vec<GGUFValue>),
UInt64(u64),
Int64(i64),
Float64(f64),
}
#[derive(Debug, Clone, PartialEq)]
pub struct GGUFHeader {
pub magic: u32,
pub version: u32,
pub tensor_count: u64,
pub metadata_count: u64,
}
#[derive(Debug, Clone, PartialEq)]
pub struct TensorInfo {
pub name: String,
pub n_dims: u32,
pub dims: Vec<u64>,
pub qtype: u32,
pub offset: u64,
}
#[derive(Debug, Clone)]
pub struct GGUFModel {
pub header: GGUFHeader,
pub metadata: HashMap<String, GGUFValue>,
pub tensors: Vec<TensorInfo>,
pub tensor_data_start: usize,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_magic_constant() {
assert_eq!(GGUF_MAGIC, 0x4655_4747);
}
#[test]
fn test_quantization_constants() {
assert_eq!(GGUF_TYPE_F32, 0);
assert_eq!(GGUF_TYPE_F16, 1);
assert_eq!(GGUF_TYPE_Q4_0, 2);
assert_eq!(GGUF_TYPE_Q8_0, 8);
assert_eq!(GGUF_TYPE_Q4_K, 12);
assert_eq!(GGUF_TYPE_Q6_K, 14);
}
#[test]
fn test_buffer_constants() {
assert_eq!(TOKEN_BUFFER_INLINE_CAP, 32);
assert_eq!(ATTENTION_BUFFER_INLINE_CAP, 64);
assert_eq!(HIDDEN_BUFFER_INLINE_CAP, 128);
}
#[test]
fn test_buffer_watermarks() {
assert_eq!(BUFFER_LW_SIZE, 1024);
assert_eq!(BUFFER_HW_SIZE, 8 * 1024);
assert_eq!(BUFFER_MAX_SIZE, 32 * 1024);
}
#[test]
fn test_version_constant() {
assert_eq!(GGUF_VERSION_V3, 3);
}
#[test]
fn test_gguf_value_variants() {
let uint8 = GGUFValue::UInt8(255);
let string = GGUFValue::String("test".to_string());
let array = GGUFValue::Array(vec![GGUFValue::UInt32(1), GGUFValue::UInt32(2)]);
assert_eq!(uint8, GGUFValue::UInt8(255));
assert_eq!(string, GGUFValue::String("test".to_string()));
assert!(matches!(array, GGUFValue::Array(_)));
}
#[test]
fn test_gguf_header() {
let header = GGUFHeader {
magic: GGUF_MAGIC,
version: GGUF_VERSION_V3,
tensor_count: 100,
metadata_count: 50,
};
assert_eq!(header.magic, 0x4655_4747);
assert_eq!(header.version, 3);
assert_eq!(header.tensor_count, 100);
assert_eq!(header.metadata_count, 50);
}
#[test]
fn test_tensor_info() {
let info = TensorInfo {
name: "model.layers.0.attn.wq".to_string(),
n_dims: 2,
dims: vec![4096, 4096],
qtype: GGUF_TYPE_Q4_K,
offset: 1024,
};
assert_eq!(info.name, "model.layers.0.attn.wq");
assert_eq!(info.n_dims, 2);
assert_eq!(info.dims, vec![4096, 4096]);
assert_eq!(info.qtype, GGUF_TYPE_Q4_K);
assert_eq!(info.offset, 1024);
}
#[test]
fn test_gguf_model() {
let model = GGUFModel {
header: GGUFHeader {
magic: GGUF_MAGIC,
version: GGUF_VERSION_V3,
tensor_count: 1,
metadata_count: 0,
},
metadata: HashMap::new(),
tensors: vec![],
tensor_data_start: 128,
};
assert_eq!(model.header.magic, GGUF_MAGIC);
assert!(model.tensors.is_empty());
assert_eq!(model.tensor_data_start, 128);
}
#[test]
fn test_alignment_constant() {
assert_eq!(GGUF_ALIGNMENT, 32);
assert_eq!(GGUF_ALIGNMENT & (GGUF_ALIGNMENT - 1), 0);
}
}