use std::collections::HashMap;
use std::io::{Read, Seek, SeekFrom};
const GGUF_MAGIC: [u8; 4] = [0x47, 0x47, 0x55, 0x46];
const GGUF_TYPE_UINT8: u32 = 0;
const GGUF_TYPE_INT8: u32 = 1;
const GGUF_TYPE_UINT16: u32 = 2;
const GGUF_TYPE_INT16: u32 = 3;
const GGUF_TYPE_UINT32: u32 = 4;
const GGUF_TYPE_INT32: u32 = 5;
const GGUF_TYPE_FLOAT32: u32 = 6;
const GGUF_TYPE_BOOL: u32 = 7;
const GGUF_TYPE_STRING: u32 = 8;
const GGUF_TYPE_ARRAY: u32 = 9;
const GGUF_TYPE_UINT64: u32 = 10;
const GGUF_TYPE_INT64: u32 = 11;
const GGUF_TYPE_FLOAT64: u32 = 12;
#[derive(Debug, Clone)]
pub enum GgufValue {
U8(u8),
I8(i8),
U16(u16),
I16(i16),
U32(u32),
I32(i32),
F32(f32),
Bool(bool),
Str(String),
U64(u64),
I64(i64),
F64(f64),
Array(Vec<GgufValue>),
}
impl GgufValue {
pub fn as_u64(&self) -> Option<u64> {
match self {
GgufValue::U8(v) => Some(*v as u64),
GgufValue::U16(v) => Some(*v as u64),
GgufValue::U32(v) => Some(*v as u64),
GgufValue::U64(v) => Some(*v),
GgufValue::I8(v) if *v >= 0 => Some(*v as u64),
GgufValue::I16(v) if *v >= 0 => Some(*v as u64),
GgufValue::I32(v) if *v >= 0 => Some(*v as u64),
GgufValue::I64(v) if *v >= 0 => Some(*v as u64),
_ => None,
}
}
pub fn as_str(&self) -> Option<&str> {
if let GgufValue::Str(s) = self {
Some(s.as_str())
} else {
None
}
}
pub fn as_f32(&self) -> Option<f32> {
match self {
GgufValue::F32(v) => Some(*v),
GgufValue::F64(v) => Some(*v as f32),
_ => None,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct GgufModelArch {
pub architecture: Option<String>,
pub context_length: Option<u64>,
pub embedding_length: Option<u64>,
pub feed_forward_length: Option<u64>,
pub head_count: Option<u64>,
pub head_count_kv: Option<u64>,
pub layer_count: Option<u64>,
pub rope_dimension_count: Option<u64>,
pub vocab_size: Option<u64>,
}
#[derive(Debug, Clone)]
pub struct GgufTensorInfo {
pub name: String,
pub dims: Vec<u64>,
pub data_type: u32,
pub offset: u64,
pub param_count: u64,
}
#[derive(Debug, Clone)]
pub struct GgufMetadata {
pub version: u32,
pub n_tensors: u64,
pub kv: HashMap<String, GgufValue>,
pub tensors: Vec<GgufTensorInfo>,
pub arch: GgufModelArch,
pub file_size_bytes: Option<u64>,
}
impl GgufMetadata {
pub fn total_params(&self) -> u64 {
self.tensors.iter().map(|t| t.param_count).sum()
}
pub fn estimated_size_bytes(&self) -> u64 {
self.tensors
.iter()
.map(|t| {
let bpe: u64 = ggml_bytes_per_element(t.data_type);
t.param_count.saturating_mul(bpe)
})
.sum()
}
pub fn tensors_with_prefix(&self, prefix: &str) -> Vec<&GgufTensorInfo> {
self.tensors
.iter()
.filter(|t| t.name.starts_with(prefix))
.collect()
}
}
fn ggml_bytes_per_element(data_type: u32) -> u64 {
match data_type {
0 => 4, 1 => 2, 2 => 1, 3 => 1, 6 => 1, 7 => 1, 8 => 1, 9 => 1, 10 => 1, 11 => 1, 12 => 1, 13 => 1, 14 => 1, 15 => 1, 16 => 2, 17 => 2, 18 => 4, 19 => 2, 20 => 4, _ => 4, }
}
#[derive(Debug, thiserror::Error)]
pub enum GgufParseError {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("invalid GGUF magic bytes")]
InvalidMagic,
#[error("unsupported GGUF version: {0}")]
UnsupportedVersion(u32),
#[error("invalid UTF-8 in key/name: {0}")]
InvalidUtf8(#[from] std::string::FromUtf8Error),
#[error("unknown value type: {0}")]
UnknownValueType(u32),
#[error("truncated file")]
Truncated,
#[error("nested GGUF arrays are not supported")]
NestedArrayUnsupported,
}
fn read_exact_or_truncated<R: Read>(r: &mut R, buf: &mut [u8]) -> Result<(), GgufParseError> {
r.read_exact(buf).map_err(|e| {
if e.kind() == std::io::ErrorKind::UnexpectedEof {
GgufParseError::Truncated
} else {
GgufParseError::Io(e)
}
})
}
fn read_u8<R: Read>(r: &mut R) -> Result<u8, GgufParseError> {
let mut buf = [0u8; 1];
read_exact_or_truncated(r, &mut buf)?;
Ok(buf[0])
}
fn read_i8<R: Read>(r: &mut R) -> Result<i8, GgufParseError> {
read_u8(r).map(|v| v as i8)
}
fn read_u16_le<R: Read>(r: &mut R) -> Result<u16, GgufParseError> {
let mut buf = [0u8; 2];
read_exact_or_truncated(r, &mut buf)?;
Ok(u16::from_le_bytes(buf))
}
fn read_i16_le<R: Read>(r: &mut R) -> Result<i16, GgufParseError> {
read_u16_le(r).map(|v| v as i16)
}
fn read_u32_le<R: Read>(r: &mut R) -> Result<u32, GgufParseError> {
let mut buf = [0u8; 4];
read_exact_or_truncated(r, &mut buf)?;
Ok(u32::from_le_bytes(buf))
}
fn read_i32_le<R: Read>(r: &mut R) -> Result<i32, GgufParseError> {
read_u32_le(r).map(|v| v as i32)
}
fn read_f32_le<R: Read>(r: &mut R) -> Result<f32, GgufParseError> {
let bits = read_u32_le(r)?;
Ok(f32::from_bits(bits))
}
fn read_u64_le<R: Read>(r: &mut R) -> Result<u64, GgufParseError> {
let mut buf = [0u8; 8];
read_exact_or_truncated(r, &mut buf)?;
Ok(u64::from_le_bytes(buf))
}
fn read_i64_le<R: Read>(r: &mut R) -> Result<i64, GgufParseError> {
read_u64_le(r).map(|v| v as i64)
}
fn read_f64_le<R: Read>(r: &mut R) -> Result<f64, GgufParseError> {
let bits = read_u64_le(r)?;
Ok(f64::from_bits(bits))
}
fn read_gguf_string<R: Read>(r: &mut R) -> Result<String, GgufParseError> {
let len = read_u64_le(r)? as usize;
let mut buf = vec![0u8; len];
read_exact_or_truncated(r, &mut buf)?;
Ok(String::from_utf8(buf)?)
}
pub struct GgufParser;
impl GgufParser {
pub fn parse<R: Read + Seek>(reader: &mut R) -> Result<GgufMetadata, GgufParseError> {
let file_size_bytes = reader.seek(SeekFrom::End(0)).ok().map(|sz| {
let _ = reader.seek(SeekFrom::Start(0));
sz
});
reader
.seek(SeekFrom::Start(0))
.map_err(GgufParseError::Io)?;
let mut magic = [0u8; 4];
read_exact_or_truncated(reader, &mut magic)?;
if magic != GGUF_MAGIC {
return Err(GgufParseError::InvalidMagic);
}
let version = read_u32_le(reader)?;
if version != 2 && version != 3 {
return Err(GgufParseError::UnsupportedVersion(version));
}
let n_tensors = read_u64_le(reader)?;
let n_kv = read_u64_le(reader)?;
let mut kv: HashMap<String, GgufValue> = HashMap::with_capacity(n_kv as usize);
for _ in 0..n_kv {
let key = read_gguf_string(reader)?;
let value_type = read_u32_le(reader)?;
let value = read_value(reader, value_type)?;
kv.insert(key, value);
}
let mut tensors: Vec<GgufTensorInfo> = Vec::with_capacity(n_tensors as usize);
for _ in 0..n_tensors {
let name = read_gguf_string(reader)?;
let n_dims = read_u32_le(reader)?;
let mut dims = Vec::with_capacity(n_dims as usize);
for _ in 0..n_dims {
dims.push(read_u64_le(reader)?);
}
let data_type = read_u32_le(reader)?;
let offset = read_u64_le(reader)?;
let param_count = dims.iter().product::<u64>().max(1);
tensors.push(GgufTensorInfo {
name,
dims,
data_type,
offset,
param_count,
});
}
let arch = extract_arch(&kv);
Ok(GgufMetadata {
version,
n_tensors,
kv,
tensors,
arch,
file_size_bytes,
})
}
pub fn parse_file(path: &std::path::Path) -> Result<GgufMetadata, GgufParseError> {
let mut file = std::fs::File::open(path).map_err(GgufParseError::Io)?;
Self::parse(&mut file)
}
pub fn parse_bytes(bytes: &[u8]) -> Result<GgufMetadata, GgufParseError> {
let mut cursor = std::io::Cursor::new(bytes);
Self::parse(&mut cursor)
}
}
fn read_value<R: Read>(reader: &mut R, value_type: u32) -> Result<GgufValue, GgufParseError> {
match value_type {
GGUF_TYPE_UINT8 => Ok(GgufValue::U8(read_u8(reader)?)),
GGUF_TYPE_INT8 => Ok(GgufValue::I8(read_i8(reader)?)),
GGUF_TYPE_UINT16 => Ok(GgufValue::U16(read_u16_le(reader)?)),
GGUF_TYPE_INT16 => Ok(GgufValue::I16(read_i16_le(reader)?)),
GGUF_TYPE_UINT32 => Ok(GgufValue::U32(read_u32_le(reader)?)),
GGUF_TYPE_INT32 => Ok(GgufValue::I32(read_i32_le(reader)?)),
GGUF_TYPE_FLOAT32 => Ok(GgufValue::F32(read_f32_le(reader)?)),
GGUF_TYPE_BOOL => {
let b = read_u8(reader)?;
Ok(GgufValue::Bool(b != 0))
}
GGUF_TYPE_STRING => Ok(GgufValue::Str(read_gguf_string(reader)?)),
GGUF_TYPE_ARRAY => {
let elem_type = read_u32_le(reader)?;
if elem_type == GGUF_TYPE_ARRAY {
return Err(GgufParseError::NestedArrayUnsupported);
}
let count = read_u64_le(reader)?;
let mut items = Vec::with_capacity(count as usize);
for _ in 0..count {
items.push(read_value(reader, elem_type)?);
}
Ok(GgufValue::Array(items))
}
GGUF_TYPE_UINT64 => Ok(GgufValue::U64(read_u64_le(reader)?)),
GGUF_TYPE_INT64 => Ok(GgufValue::I64(read_i64_le(reader)?)),
GGUF_TYPE_FLOAT64 => Ok(GgufValue::F64(read_f64_le(reader)?)),
unknown => Err(GgufParseError::UnknownValueType(unknown)),
}
}
fn extract_arch(kv: &HashMap<String, GgufValue>) -> GgufModelArch {
let architecture: Option<String> = kv
.get("general.architecture")
.and_then(|v| v.as_str())
.map(str::to_owned);
let arch_prefix: String = architecture.clone().unwrap_or_else(|| "llama".to_owned());
let get_u64 = |suffix: &str| -> Option<u64> {
kv.get(&format!("{arch_prefix}.{suffix}"))
.or_else(|| kv.get(suffix))
.and_then(|v| v.as_u64())
};
GgufModelArch {
architecture,
context_length: get_u64("context_length"),
embedding_length: get_u64("embedding_length"),
feed_forward_length: get_u64("feed_forward_length"),
head_count: get_u64("attention.head_count"),
head_count_kv: get_u64("attention.head_count_kv"),
layer_count: get_u64("block_count"),
rope_dimension_count: get_u64("rope.dimension_count"),
vocab_size: kv
.get("tokenizer.ggml.token_type")
.and_then(|v| {
if let GgufValue::Array(arr) = v {
Some(arr.len() as u64)
} else {
None
}
})
.or_else(|| get_u64("vocab_size")),
}
}