use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt;
use std::path::Path;
use crate::error::AnamnesisError;
const GGUF_MAGIC: &[u8; 4] = b"GGUF";
const GGUF_MAGIC_LE_U32: u32 = u32::from_le_bytes(*GGUF_MAGIC);
const GGUF_MAGIC_BE_U32: u32 = u32::from_be_bytes(*GGUF_MAGIC);
const DEFAULT_ALIGNMENT: u32 = 32;
const MAX_TENSOR_COUNT: u64 = 1_000_000;
const MAX_KV_COUNT: u64 = 1_000_000;
const MAX_STRING_LEN: u64 = 16 * 1024 * 1024;
const MAX_ARRAY_LEN: u64 = 16_000_000;
const MAX_ARRAY_DEPTH: u32 = 4;
const MAX_TENSOR_DIMS: u32 = 8;
const MAX_TENSOR_NAME_LEN: u64 = 65_535;
const MAX_TENSOR_ELEMENTS: u64 = 1_000_000_000_000;
const PREALLOC_SOFT_CAP: usize = 256;
const GGUF_TYPE_COUNT: usize = 32;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
#[allow(non_camel_case_types)]
pub enum GgufType {
F32,
F16,
Q4_0,
Q4_1,
Q5_0,
Q5_1,
Q8_0,
Q8_1,
Q2_K,
Q3_K,
Q4_K,
Q5_K,
Q6_K,
Q8_K,
IQ2_XXS,
IQ2_XS,
IQ3_XXS,
IQ1_S,
IQ4_NL,
IQ3_S,
IQ2_S,
IQ4_XS,
I8,
I16,
I32,
I64,
F64,
IQ1_M,
BF16,
TQ1_0,
TQ2_0,
MXFP4,
}
impl GgufType {
fn from_u32(value: u32) -> crate::Result<Self> {
let ty = match value {
0 => Self::F32,
1 => Self::F16,
2 => Self::Q4_0,
3 => Self::Q4_1,
6 => Self::Q5_0,
7 => Self::Q5_1,
8 => Self::Q8_0,
9 => Self::Q8_1,
10 => Self::Q2_K,
11 => Self::Q3_K,
12 => Self::Q4_K,
13 => Self::Q5_K,
14 => Self::Q6_K,
15 => Self::Q8_K,
16 => Self::IQ2_XXS,
17 => Self::IQ2_XS,
18 => Self::IQ3_XXS,
19 => Self::IQ1_S,
20 => Self::IQ4_NL,
21 => Self::IQ3_S,
22 => Self::IQ2_S,
23 => Self::IQ4_XS,
24 => Self::I8,
25 => Self::I16,
26 => Self::I32,
27 => Self::I64,
28 => Self::F64,
29 => Self::IQ1_M,
30 => Self::BF16,
34 => Self::TQ1_0,
35 => Self::TQ2_0,
39 => Self::MXFP4,
other => {
return Err(AnamnesisError::Unsupported {
format: "GGUF".into(),
detail: format!("unknown ggml_type discriminant {other}"),
});
}
};
Ok(ty)
}
#[must_use]
pub const fn block_size(self) -> usize {
match self {
Self::F32
| Self::F16
| Self::BF16
| Self::F64
| Self::I8
| Self::I16
| Self::I32
| Self::I64 => 1,
Self::Q4_0
| Self::Q4_1
| Self::Q5_0
| Self::Q5_1
| Self::Q8_0
| Self::Q8_1
| Self::IQ4_NL
| Self::MXFP4 => 32,
Self::Q2_K
| Self::Q3_K
| Self::Q4_K
| Self::Q5_K
| Self::Q6_K
| Self::Q8_K
| Self::IQ2_XXS
| Self::IQ2_XS
| Self::IQ3_XXS
| Self::IQ1_S
| Self::IQ3_S
| Self::IQ2_S
| Self::IQ4_XS
| Self::IQ1_M
| Self::TQ1_0
| Self::TQ2_0 => 256,
}
}
#[allow(clippy::match_same_arms)]
#[must_use]
pub const fn type_size(self) -> Option<usize> {
match self {
Self::I8 => Some(1),
Self::F16 | Self::BF16 | Self::I16 => Some(2),
Self::F32 | Self::I32 => Some(4),
Self::F64 | Self::I64 => Some(8),
Self::Q4_0 => Some(18),
Self::Q4_1 => Some(20),
Self::Q5_0 => Some(22),
Self::Q5_1 => Some(24),
Self::Q8_0 => Some(34),
Self::Q8_1 => Some(36),
Self::Q2_K => Some(84),
Self::Q3_K => Some(110),
Self::Q4_K => Some(144),
Self::Q5_K => Some(176),
Self::Q6_K => Some(210),
Self::Q8_K => Some(292),
Self::IQ4_NL => Some(18),
Self::IQ4_XS => Some(136),
Self::IQ2_XXS => Some(66),
Self::IQ2_XS => Some(74),
Self::IQ2_S => Some(82),
Self::IQ3_XXS => Some(98),
Self::IQ3_S => Some(110),
Self::IQ1_S => Some(50),
Self::IQ1_M => Some(56),
Self::TQ1_0 => Some(54),
Self::TQ2_0 => Some(66),
Self::MXFP4 => Some(17),
}
}
#[must_use]
pub const fn is_quantized(self) -> bool {
!matches!(
self,
Self::F32
| Self::F16
| Self::BF16
| Self::F64
| Self::I8
| Self::I16
| Self::I32
| Self::I64
)
}
const fn inspect_index(self) -> usize {
match self {
Self::F32 => 0,
Self::F16 => 1,
Self::Q4_0 => 2,
Self::Q4_1 => 3,
Self::Q5_0 => 4,
Self::Q5_1 => 5,
Self::Q8_0 => 6,
Self::Q8_1 => 7,
Self::Q2_K => 8,
Self::Q3_K => 9,
Self::Q4_K => 10,
Self::Q5_K => 11,
Self::Q6_K => 12,
Self::Q8_K => 13,
Self::IQ2_XXS => 14,
Self::IQ2_XS => 15,
Self::IQ3_XXS => 16,
Self::IQ1_S => 17,
Self::IQ4_NL => 18,
Self::IQ3_S => 19,
Self::IQ2_S => 20,
Self::IQ4_XS => 21,
Self::I8 => 22,
Self::I16 => 23,
Self::I32 => 24,
Self::I64 => 25,
Self::F64 => 26,
Self::IQ1_M => 27,
Self::BF16 => 28,
Self::TQ1_0 => 29,
Self::TQ2_0 => 30,
Self::MXFP4 => 31,
}
}
pub fn byte_size_for_n_elements(self, n_elements: u64) -> crate::Result<u64> {
let type_size = self
.type_size()
.ok_or_else(|| AnamnesisError::Unsupported {
format: "GGUF".into(),
detail: format!("byte size not hard-coded for ggml_type {self}"),
})?;
#[allow(clippy::as_conversions)]
let block_size = self.block_size() as u64;
#[allow(clippy::as_conversions)]
let type_size_u64 = type_size as u64;
if !n_elements.is_multiple_of(block_size) {
return Err(AnamnesisError::Parse {
reason: format!(
"GGUF tensor: element count {n_elements} not a multiple of block size \
{block_size} for type {self}"
),
});
}
let n_blocks = n_elements / block_size;
n_blocks
.checked_mul(type_size_u64)
.ok_or_else(|| AnamnesisError::Parse {
reason: format!(
"GGUF tensor: byte-size overflow ({n_blocks} blocks × {type_size_u64} bytes)"
),
})
}
}
impl fmt::Display for GgufType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
Self::F32 => "F32",
Self::F16 => "F16",
Self::BF16 => "BF16",
Self::F64 => "F64",
Self::I8 => "I8",
Self::I16 => "I16",
Self::I32 => "I32",
Self::I64 => "I64",
Self::Q4_0 => "Q4_0",
Self::Q4_1 => "Q4_1",
Self::Q5_0 => "Q5_0",
Self::Q5_1 => "Q5_1",
Self::Q8_0 => "Q8_0",
Self::Q8_1 => "Q8_1",
Self::Q2_K => "Q2_K",
Self::Q3_K => "Q3_K",
Self::Q4_K => "Q4_K",
Self::Q5_K => "Q5_K",
Self::Q6_K => "Q6_K",
Self::Q8_K => "Q8_K",
Self::IQ2_XXS => "IQ2_XXS",
Self::IQ2_XS => "IQ2_XS",
Self::IQ3_XXS => "IQ3_XXS",
Self::IQ1_S => "IQ1_S",
Self::IQ4_NL => "IQ4_NL",
Self::IQ3_S => "IQ3_S",
Self::IQ2_S => "IQ2_S",
Self::IQ4_XS => "IQ4_XS",
Self::IQ1_M => "IQ1_M",
Self::TQ1_0 => "TQ1_0",
Self::TQ2_0 => "TQ2_0",
Self::MXFP4 => "MXFP4",
};
f.write_str(s)
}
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub enum GgufMetadataValue {
U8(u8),
I8(i8),
U16(u16),
I16(i16),
U32(u32),
I32(i32),
F32(f32),
Bool(bool),
String(String),
Array(Box<GgufMetadataArray>),
U64(u64),
I64(i64),
F64(f64),
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub enum GgufMetadataArray {
U8(Vec<u8>),
I8(Vec<i8>),
U16(Vec<u16>),
I16(Vec<i16>),
U32(Vec<u32>),
I32(Vec<i32>),
F32(Vec<f32>),
Bool(Vec<bool>),
String(Vec<String>),
Array(Vec<GgufMetadataArray>),
U64(Vec<u64>),
I64(Vec<i64>),
F64(Vec<f64>),
}
const _: () = {
assert!(
std::mem::size_of::<GgufMetadataValue>() == 24,
"GgufMetadataValue must be 24 bytes (Array must be Box<GgufMetadataArray>)"
);
assert!(
std::mem::size_of::<GgufMetadataArray>() == 32,
"GgufMetadataArray must be 32 bytes (largest variant Vec<T> = 24 + 8-byte tag)"
);
};
impl GgufMetadataValue {
#[must_use]
pub fn as_string(&self) -> Option<&str> {
if let Self::String(s) = self {
Some(s.as_str())
} else {
None
}
}
#[must_use]
pub const fn as_u32(&self) -> Option<u32> {
if let Self::U32(v) = self {
Some(*v)
} else {
None
}
}
#[must_use]
pub const fn as_u64(&self) -> Option<u64> {
if let Self::U64(v) = self {
Some(*v)
} else {
None
}
}
#[must_use]
pub const fn as_bool(&self) -> Option<bool> {
if let Self::Bool(v) = self {
Some(*v)
} else {
None
}
}
#[must_use]
pub fn as_array(&self) -> Option<&GgufMetadataArray> {
if let Self::Array(v) = self {
Some(v.as_ref())
} else {
None
}
}
}
impl GgufMetadataArray {
#[must_use]
pub fn len(&self) -> usize {
match self {
Self::U8(v) => v.len(),
Self::I8(v) => v.len(),
Self::U16(v) => v.len(),
Self::I16(v) => v.len(),
Self::U32(v) => v.len(),
Self::I32(v) => v.len(),
Self::F32(v) => v.len(),
Self::Bool(v) => v.len(),
Self::String(v) => v.len(),
Self::Array(v) => v.len(),
Self::U64(v) => v.len(),
Self::I64(v) => v.len(),
Self::F64(v) => v.len(),
}
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[must_use]
pub fn as_u8_slice(&self) -> Option<&[u8]> {
if let Self::U8(v) = self {
Some(v.as_slice())
} else {
None
}
}
#[must_use]
pub fn as_i8_slice(&self) -> Option<&[i8]> {
if let Self::I8(v) = self {
Some(v.as_slice())
} else {
None
}
}
#[must_use]
pub fn as_u16_slice(&self) -> Option<&[u16]> {
if let Self::U16(v) = self {
Some(v.as_slice())
} else {
None
}
}
#[must_use]
pub fn as_i16_slice(&self) -> Option<&[i16]> {
if let Self::I16(v) = self {
Some(v.as_slice())
} else {
None
}
}
#[must_use]
pub fn as_u32_slice(&self) -> Option<&[u32]> {
if let Self::U32(v) = self {
Some(v.as_slice())
} else {
None
}
}
#[must_use]
pub fn as_i32_slice(&self) -> Option<&[i32]> {
if let Self::I32(v) = self {
Some(v.as_slice())
} else {
None
}
}
#[must_use]
pub fn as_f32_slice(&self) -> Option<&[f32]> {
if let Self::F32(v) = self {
Some(v.as_slice())
} else {
None
}
}
#[must_use]
pub fn as_bool_slice(&self) -> Option<&[bool]> {
if let Self::Bool(v) = self {
Some(v.as_slice())
} else {
None
}
}
#[must_use]
pub fn as_string_slice(&self) -> Option<&[String]> {
if let Self::String(v) = self {
Some(v.as_slice())
} else {
None
}
}
#[must_use]
pub fn as_nested_slice(&self) -> Option<&[GgufMetadataArray]> {
if let Self::Array(v) = self {
Some(v.as_slice())
} else {
None
}
}
#[must_use]
pub fn as_u64_slice(&self) -> Option<&[u64]> {
if let Self::U64(v) = self {
Some(v.as_slice())
} else {
None
}
}
#[must_use]
pub fn as_i64_slice(&self) -> Option<&[i64]> {
if let Self::I64(v) = self {
Some(v.as_slice())
} else {
None
}
}
#[must_use]
pub fn as_f64_slice(&self) -> Option<&[f64]> {
if let Self::F64(v) = self {
Some(v.as_slice())
} else {
None
}
}
}
#[derive(Debug, Clone)]
pub struct GgufTensorInfo {
pub name: String,
pub shape: Vec<usize>,
pub dtype: GgufType,
pub data_offset: u64,
pub byte_len: Option<u64>,
}
#[derive(Debug, Clone)]
pub struct GgufTensor<'a> {
pub name: &'a str,
pub shape: &'a [usize],
pub dtype: GgufType,
pub data: Cow<'a, [u8]>,
}
#[derive(Debug, Clone)]
#[must_use]
pub struct GgufInspectInfo {
pub version: u32,
pub architecture: Option<String>,
pub tensor_count: usize,
pub total_bytes: u64,
pub unknown_size_tensors: usize,
pub dtypes: Vec<GgufType>,
pub alignment: u32,
}
impl fmt::Display for GgufInspectInfo {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Format: GGUF v{}", self.version)?;
if let Some(arch) = self.architecture.as_deref() {
write!(f, "\nArch: {arch}")?;
}
write!(f, "\nTensors: {}", self.tensor_count)?;
write!(
f,
"\nTotal size: {}",
crate::inspect::format_bytes(self.total_bytes)
)?;
if self.unknown_size_tensors > 0 {
write!(
f,
" (+{} tensors with dtype of unknown size)",
self.unknown_size_tensors
)?;
}
let dtype_list: String = self
.dtypes
.iter()
.map(ToString::to_string)
.collect::<Vec<_>>()
.join(", ");
write!(f, "\nDtypes: {dtype_list}")?;
write!(f, "\nAlignment: {} bytes", self.alignment)?;
Ok(())
}
}
#[derive(Debug)]
pub struct ParsedGguf {
mmap: memmap2::Mmap,
version: u32,
alignment: u32,
metadata: HashMap<String, GgufMetadataValue>,
tensor_infos: Vec<GgufTensorInfo>,
}
impl ParsedGguf {
#[must_use]
pub const fn version(&self) -> u32 {
self.version
}
#[must_use]
pub const fn alignment(&self) -> u32 {
self.alignment
}
#[must_use]
pub const fn len(&self) -> usize {
self.tensor_infos.len()
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.tensor_infos.is_empty()
}
#[must_use]
pub const fn metadata(&self) -> &HashMap<String, GgufMetadataValue> {
&self.metadata
}
#[must_use]
pub fn tensor_info(&self) -> &[GgufTensorInfo] {
&self.tensor_infos
}
pub fn tensors(&self) -> impl Iterator<Item = GgufTensor<'_>> + '_ {
self.tensor_infos.iter().filter_map(|info| {
let byte_len_u64 = info.byte_len?;
let start = usize::try_from(info.data_offset).ok()?;
let byte_len = usize::try_from(byte_len_u64).ok()?;
let end = start.checked_add(byte_len)?;
let slice = self.mmap.get(start..end)?;
Some(GgufTensor {
name: info.name.as_str(),
shape: info.shape.as_slice(),
dtype: info.dtype,
data: Cow::Borrowed(slice),
})
})
}
pub fn inspect(&self) -> GgufInspectInfo {
let mut total_bytes: u64 = 0;
let mut unknown_size_tensors: usize = 0;
let mut seen = [false; GGUF_TYPE_COUNT];
let mut dtypes: Vec<GgufType> = Vec::new();
for info in &self.tensor_infos {
if let Some(byte_len) = info.byte_len {
total_bytes = total_bytes.saturating_add(byte_len);
} else {
unknown_size_tensors = unknown_size_tensors.saturating_add(1);
}
let idx = info.dtype.inspect_index();
#[allow(clippy::indexing_slicing)]
if !seen[idx] {
#[allow(clippy::indexing_slicing)]
{
seen[idx] = true;
}
dtypes.push(info.dtype);
}
}
let architecture = self
.metadata
.get("general.architecture")
.and_then(GgufMetadataValue::as_string)
.map(str::to_owned);
GgufInspectInfo {
version: self.version,
architecture,
tensor_count: self.tensor_infos.len(),
total_bytes,
unknown_size_tensors,
dtypes,
alignment: self.alignment,
}
}
pub fn dequantize_tensor(&self, info: &GgufTensorInfo) -> crate::Result<Vec<u8>> {
let byte_len_u64 = info.byte_len.ok_or_else(|| AnamnesisError::Unsupported {
format: "GGUF".into(),
detail: format!(
"byte size not known for dtype {} — dequantisation not yet supported",
info.dtype
),
})?;
let start = usize::try_from(info.data_offset).map_err(|_| AnamnesisError::Parse {
reason: format!(
"tensor `{}`: data_offset {} exceeds usize",
info.name, info.data_offset
),
})?;
let byte_len = usize::try_from(byte_len_u64).map_err(|_| AnamnesisError::Parse {
reason: format!(
"tensor `{}`: byte_len {byte_len_u64} exceeds usize",
info.name
),
})?;
let end = start
.checked_add(byte_len)
.ok_or_else(|| AnamnesisError::Parse {
reason: format!(
"tensor `{}`: data_offset + byte_len overflows usize",
info.name
),
})?;
let data = self
.mmap
.get(start..end)
.ok_or_else(|| AnamnesisError::Parse {
reason: format!(
"tensor `{}`: byte range {start}..{end} exceeds mmap length {}",
info.name,
self.mmap.len()
),
})?;
let n_elements: usize = info
.shape
.iter()
.try_fold(1usize, |acc, &d| acc.checked_mul(d))
.ok_or_else(|| AnamnesisError::Parse {
reason: format!("tensor `{}`: element count overflows usize", info.name),
})?;
crate::remember::gguf::dequantize_gguf_to_bf16(data, info.dtype, n_elements)
}
}
struct Cursor<'a> {
buf: &'a [u8],
pos: usize,
}
impl<'a> Cursor<'a> {
const fn new(buf: &'a [u8]) -> Self {
Self { buf, pos: 0 }
}
fn read_bytes(&mut self, n: usize) -> crate::Result<&'a [u8]> {
let end = self
.pos
.checked_add(n)
.ok_or_else(|| AnamnesisError::Parse {
reason: format!("GGUF: cursor overflow at pos {} + {n}", self.pos),
})?;
let slice = self
.buf
.get(self.pos..end)
.ok_or_else(|| AnamnesisError::Parse {
reason: format!(
"GGUF: unexpected EOF at pos {} (wanted {n} bytes, have {})",
self.pos,
self.buf.len().saturating_sub(self.pos)
),
})?;
self.pos = end;
Ok(slice)
}
fn read_u8(&mut self) -> crate::Result<u8> {
let bytes = self.read_bytes(1)?;
#[allow(clippy::indexing_slicing)]
Ok(bytes[0])
}
fn read_i8(&mut self) -> crate::Result<i8> {
#[allow(clippy::as_conversions, clippy::cast_possible_wrap)]
Ok(self.read_u8()? as i8)
}
fn read_u16_le(&mut self) -> crate::Result<u16> {
let bytes = self.read_bytes(2)?;
let mut arr = [0u8; 2];
arr.copy_from_slice(bytes);
Ok(u16::from_le_bytes(arr))
}
fn read_i16_le(&mut self) -> crate::Result<i16> {
let bytes = self.read_bytes(2)?;
let mut arr = [0u8; 2];
arr.copy_from_slice(bytes);
Ok(i16::from_le_bytes(arr))
}
fn read_u32_le(&mut self) -> crate::Result<u32> {
let bytes = self.read_bytes(4)?;
let mut arr = [0u8; 4];
arr.copy_from_slice(bytes);
Ok(u32::from_le_bytes(arr))
}
fn read_i32_le(&mut self) -> crate::Result<i32> {
let bytes = self.read_bytes(4)?;
let mut arr = [0u8; 4];
arr.copy_from_slice(bytes);
Ok(i32::from_le_bytes(arr))
}
fn read_u64_le(&mut self) -> crate::Result<u64> {
let bytes = self.read_bytes(8)?;
let mut arr = [0u8; 8];
arr.copy_from_slice(bytes);
Ok(u64::from_le_bytes(arr))
}
fn read_i64_le(&mut self) -> crate::Result<i64> {
let bytes = self.read_bytes(8)?;
let mut arr = [0u8; 8];
arr.copy_from_slice(bytes);
Ok(i64::from_le_bytes(arr))
}
fn read_f32_le(&mut self) -> crate::Result<f32> {
let bytes = self.read_bytes(4)?;
let mut arr = [0u8; 4];
arr.copy_from_slice(bytes);
Ok(f32::from_le_bytes(arr))
}
fn read_f64_le(&mut self) -> crate::Result<f64> {
let bytes = self.read_bytes(8)?;
let mut arr = [0u8; 8];
arr.copy_from_slice(bytes);
Ok(f64::from_le_bytes(arr))
}
fn read_bool(&mut self) -> crate::Result<bool> {
let b = self.read_u8()?;
match b {
0 => Ok(false),
1 => Ok(true),
other => Err(AnamnesisError::Parse {
reason: format!("GGUF metadata: invalid bool byte {other} (expected 0 or 1)"),
}),
}
}
fn read_string(&mut self, max_len: u64) -> crate::Result<String> {
let len = self.read_u64_le()?;
if len > max_len {
return Err(AnamnesisError::Parse {
reason: format!("GGUF: string length {len} exceeds cap {max_len}"),
});
}
let len_usz = usize::try_from(len).map_err(|_| AnamnesisError::Parse {
reason: format!("GGUF: string length {len} overflows usize"),
})?;
let bytes = self.read_bytes(len_usz)?;
let valid = std::str::from_utf8(bytes).map_err(|e| AnamnesisError::Parse {
reason: format!("GGUF: string is not valid UTF-8: {e}"),
})?;
Ok(valid.to_owned())
}
}
fn read_metadata_value(
cursor: &mut Cursor<'_>,
value_type: u32,
) -> crate::Result<GgufMetadataValue> {
match value_type {
0 => Ok(GgufMetadataValue::U8(cursor.read_u8()?)),
1 => Ok(GgufMetadataValue::I8(cursor.read_i8()?)),
2 => Ok(GgufMetadataValue::U16(cursor.read_u16_le()?)),
3 => Ok(GgufMetadataValue::I16(cursor.read_i16_le()?)),
4 => Ok(GgufMetadataValue::U32(cursor.read_u32_le()?)),
5 => Ok(GgufMetadataValue::I32(cursor.read_i32_le()?)),
6 => Ok(GgufMetadataValue::F32(cursor.read_f32_le()?)),
7 => Ok(GgufMetadataValue::Bool(cursor.read_bool()?)),
8 => Ok(GgufMetadataValue::String(
cursor.read_string(MAX_STRING_LEN)?,
)),
9 => {
let inner_type = cursor.read_u32_le()?;
let len = read_array_len(cursor)?;
let array = read_typed_array(cursor, inner_type, len, 0)?;
Ok(GgufMetadataValue::Array(Box::new(array)))
}
10 => Ok(GgufMetadataValue::U64(cursor.read_u64_le()?)),
11 => Ok(GgufMetadataValue::I64(cursor.read_i64_le()?)),
12 => Ok(GgufMetadataValue::F64(cursor.read_f64_le()?)),
other => Err(AnamnesisError::Parse {
reason: format!("GGUF metadata: unknown value type {other}"),
}),
}
}
fn read_array_len(cursor: &mut Cursor<'_>) -> crate::Result<usize> {
let len = cursor.read_u64_le()?;
if len > MAX_ARRAY_LEN {
return Err(AnamnesisError::Parse {
reason: format!("GGUF metadata: array length {len} exceeds cap {MAX_ARRAY_LEN}"),
});
}
usize::try_from(len).map_err(|_| AnamnesisError::Parse {
reason: format!("GGUF metadata: array length {len} overflows usize"),
})
}
fn read_typed_array(
cursor: &mut Cursor<'_>,
inner_type: u32,
len: usize,
depth: u32,
) -> crate::Result<GgufMetadataArray> {
let cap = len.min(PREALLOC_SOFT_CAP);
match inner_type {
0 => {
let mut v: Vec<u8> = Vec::with_capacity(cap);
for _ in 0..len {
v.push(cursor.read_u8()?);
}
Ok(GgufMetadataArray::U8(v))
}
1 => {
let mut v: Vec<i8> = Vec::with_capacity(cap);
for _ in 0..len {
v.push(cursor.read_i8()?);
}
Ok(GgufMetadataArray::I8(v))
}
2 => {
let mut v: Vec<u16> = Vec::with_capacity(cap);
for _ in 0..len {
v.push(cursor.read_u16_le()?);
}
Ok(GgufMetadataArray::U16(v))
}
3 => {
let mut v: Vec<i16> = Vec::with_capacity(cap);
for _ in 0..len {
v.push(cursor.read_i16_le()?);
}
Ok(GgufMetadataArray::I16(v))
}
4 => {
let mut v: Vec<u32> = Vec::with_capacity(cap);
for _ in 0..len {
v.push(cursor.read_u32_le()?);
}
Ok(GgufMetadataArray::U32(v))
}
5 => {
let mut v: Vec<i32> = Vec::with_capacity(cap);
for _ in 0..len {
v.push(cursor.read_i32_le()?);
}
Ok(GgufMetadataArray::I32(v))
}
6 => {
let mut v: Vec<f32> = Vec::with_capacity(cap);
for _ in 0..len {
v.push(cursor.read_f32_le()?);
}
Ok(GgufMetadataArray::F32(v))
}
7 => {
let mut v: Vec<bool> = Vec::with_capacity(cap);
for _ in 0..len {
v.push(cursor.read_bool()?);
}
Ok(GgufMetadataArray::Bool(v))
}
8 => {
let mut v: Vec<String> = Vec::with_capacity(cap);
for _ in 0..len {
v.push(cursor.read_string(MAX_STRING_LEN)?);
}
Ok(GgufMetadataArray::String(v))
}
9 => {
if depth >= MAX_ARRAY_DEPTH {
return Err(AnamnesisError::Parse {
reason: format!(
"GGUF metadata: array nesting exceeds depth cap {MAX_ARRAY_DEPTH}"
),
});
}
let mut v: Vec<GgufMetadataArray> = Vec::with_capacity(cap);
for _ in 0..len {
let sub_inner = cursor.read_u32_le()?;
let sub_len = read_array_len(cursor)?;
v.push(read_typed_array(cursor, sub_inner, sub_len, depth + 1)?);
}
Ok(GgufMetadataArray::Array(v))
}
10 => {
let mut v: Vec<u64> = Vec::with_capacity(cap);
for _ in 0..len {
v.push(cursor.read_u64_le()?);
}
Ok(GgufMetadataArray::U64(v))
}
11 => {
let mut v: Vec<i64> = Vec::with_capacity(cap);
for _ in 0..len {
v.push(cursor.read_i64_le()?);
}
Ok(GgufMetadataArray::I64(v))
}
12 => {
let mut v: Vec<f64> = Vec::with_capacity(cap);
for _ in 0..len {
v.push(cursor.read_f64_le()?);
}
Ok(GgufMetadataArray::F64(v))
}
other => Err(AnamnesisError::Parse {
reason: format!("GGUF metadata: unknown array inner type {other}"),
}),
}
}
#[allow(unsafe_code)]
pub fn parse_gguf(path: impl AsRef<Path>) -> crate::Result<ParsedGguf> {
let file = std::fs::File::open(path.as_ref())?;
let raw =
unsafe { memmap2::MmapOptions::new().populate().map(&file) }.map_err(AnamnesisError::Io)?;
let magic_bytes = raw.get(..4).ok_or_else(|| AnamnesisError::Parse {
reason: "GGUF: file shorter than 4 bytes (no magic)".into(),
})?;
if magic_bytes != GGUF_MAGIC {
let mut arr = [0u8; 4];
arr.copy_from_slice(magic_bytes);
let as_le = u32::from_le_bytes(arr);
if as_le == GGUF_MAGIC_BE_U32 {
return Err(AnamnesisError::Unsupported {
format: "GGUF".into(),
detail: "big-endian GGUF files are not yet supported".into(),
});
}
let legacy_name: Option<&'static str> = match magic_bytes {
b"GGML" => Some("GGML"),
b"GGJT" => Some("GGJT"),
b"GGMF" => Some("GGMF"),
_ => None,
};
if let Some(name) = legacy_name {
return Err(AnamnesisError::Unsupported {
format: "GGUF".into(),
detail: format!(
"legacy `{name}` format predates GGUF; re-convert with `llama.cpp` to GGUF"
),
});
}
return Err(AnamnesisError::Parse {
reason: format!(
"GGUF: invalid magic (expected `GGUF`/{GGUF_MAGIC_LE_U32:#010x}, got {as_le:#010x})"
),
});
}
let mut cursor = Cursor::new(&raw);
cursor.pos = 4;
let version = cursor.read_u32_le()?;
if version == 1 {
return Err(AnamnesisError::Unsupported {
format: "GGUF".into(),
detail: "GGUF v1 uses u32 string/array lengths and is not supported; \
re-save with a modern `llama.cpp` to produce v2 or v3"
.into(),
});
}
if version != 2 && version != 3 {
return Err(AnamnesisError::Unsupported {
format: "GGUF".into(),
detail: format!("unsupported GGUF version {version} (expected 2 or 3)"),
});
}
let tensor_count = cursor.read_u64_le()?;
let kv_count = cursor.read_u64_le()?;
if tensor_count > MAX_TENSOR_COUNT {
return Err(AnamnesisError::Parse {
reason: format!("GGUF: tensor count {tensor_count} exceeds cap {MAX_TENSOR_COUNT}"),
});
}
if kv_count > MAX_KV_COUNT {
return Err(AnamnesisError::Parse {
reason: format!("GGUF: metadata kv count {kv_count} exceeds cap {MAX_KV_COUNT}"),
});
}
let tensor_count_usz = usize::try_from(tensor_count).map_err(|_| AnamnesisError::Parse {
reason: format!("GGUF: tensor count {tensor_count} overflows usize"),
})?;
let kv_count_usz = usize::try_from(kv_count).map_err(|_| AnamnesisError::Parse {
reason: format!("GGUF: metadata kv count {kv_count} overflows usize"),
})?;
let mut metadata: HashMap<String, GgufMetadataValue> =
HashMap::with_capacity(kv_count_usz.min(PREALLOC_SOFT_CAP));
for _ in 0..kv_count_usz {
let key = cursor.read_string(u64::from(u16::MAX))?;
let value_type = cursor.read_u32_le()?;
let value = read_metadata_value(&mut cursor, value_type)?;
metadata.insert(key, value);
}
let alignment = match metadata.get("general.alignment") {
Some(GgufMetadataValue::U32(v)) if *v != 0 => *v,
Some(GgufMetadataValue::U32(_)) => {
return Err(AnamnesisError::Parse {
reason: "GGUF: general.alignment is zero".into(),
});
}
Some(other) => {
return Err(AnamnesisError::Parse {
reason: format!(
"GGUF: general.alignment has wrong type (expected UINT32, got {})",
metadata_type_name(other)
),
});
}
None => DEFAULT_ALIGNMENT,
};
let alignment_u64 = u64::from(alignment);
let mut tensor_infos: Vec<GgufTensorInfo> =
Vec::with_capacity(tensor_count_usz.min(PREALLOC_SOFT_CAP));
for _ in 0..tensor_count_usz {
tensor_infos.push(read_tensor_info_relative(&mut cursor)?);
}
#[allow(clippy::as_conversions)]
let tensor_info_end = cursor.pos as u64;
#[allow(clippy::as_conversions)]
let file_len_u64 = raw.len() as u64;
let data_section_start = if tensor_infos.is_empty() {
tensor_info_end
} else {
let start = align_up(tensor_info_end, alignment_u64)?;
if start > file_len_u64 {
return Err(AnamnesisError::Parse {
reason: format!(
"GGUF: tensor data section start {start} exceeds file size {file_len_u64}"
),
});
}
start
};
for info in &mut tensor_infos {
let relative_offset = info.data_offset;
if relative_offset % alignment_u64 != 0 {
return Err(AnamnesisError::Parse {
reason: format!(
"GGUF tensor `{}`: relative offset {relative_offset} is not a multiple of alignment {alignment_u64}",
info.name
),
});
}
let absolute = data_section_start
.checked_add(relative_offset)
.ok_or_else(|| AnamnesisError::Parse {
reason: format!(
"GGUF tensor `{}`: absolute offset overflow ({} + {})",
info.name, data_section_start, relative_offset
),
})?;
if absolute > file_len_u64 {
return Err(AnamnesisError::Parse {
reason: format!(
"GGUF tensor `{}`: data_offset {absolute} exceeds file size {file_len_u64}",
info.name
),
});
}
if let Some(len) = info.byte_len {
let end = absolute
.checked_add(len)
.ok_or_else(|| AnamnesisError::Parse {
reason: format!("GGUF tensor `{}`: end offset overflow", info.name),
})?;
if end > file_len_u64 {
return Err(AnamnesisError::Parse {
reason: format!(
"GGUF tensor `{}`: data range [{absolute}..{end}] exceeds file size {file_len_u64}",
info.name
),
});
}
}
info.data_offset = absolute;
}
Ok(ParsedGguf {
mmap: raw,
version,
alignment,
metadata,
tensor_infos,
})
}
fn read_tensor_info_relative(cursor: &mut Cursor<'_>) -> crate::Result<GgufTensorInfo> {
let name = cursor.read_string(MAX_TENSOR_NAME_LEN)?;
let n_dims = cursor.read_u32_le()?;
if n_dims == 0 {
return Err(AnamnesisError::Parse {
reason: format!("GGUF tensor `{name}`: n_dimensions is zero"),
});
}
if n_dims > MAX_TENSOR_DIMS {
return Err(AnamnesisError::Parse {
reason: format!(
"GGUF tensor `{name}`: n_dimensions {n_dims} exceeds cap {MAX_TENSOR_DIMS}"
),
});
}
let n_dims_usz = usize::try_from(n_dims).map_err(|_| AnamnesisError::Parse {
reason: format!("GGUF tensor `{name}`: n_dimensions {n_dims} overflows usize"),
})?;
let mut shape_usz: Vec<usize> = Vec::with_capacity(n_dims_usz);
let mut n_elements: u64 = 1;
for _ in 0..n_dims {
let d = cursor.read_u64_le()?;
if d == 0 {
return Err(AnamnesisError::Parse {
reason: format!("GGUF tensor `{name}`: zero-sized dimension"),
});
}
n_elements = n_elements
.checked_mul(d)
.ok_or_else(|| AnamnesisError::Parse {
reason: format!("GGUF tensor `{name}`: element count overflow"),
})?;
if n_elements > MAX_TENSOR_ELEMENTS {
return Err(AnamnesisError::Parse {
reason: format!(
"GGUF tensor `{name}`: element count {n_elements} exceeds cap {MAX_TENSOR_ELEMENTS}"
),
});
}
let d_usz = usize::try_from(d).map_err(|_| AnamnesisError::Parse {
reason: format!("GGUF tensor `{name}`: dimension {d} overflows usize"),
})?;
shape_usz.push(d_usz);
}
let dtype = GgufType::from_u32(cursor.read_u32_le()?)?;
let relative_offset = cursor.read_u64_le()?;
let byte_len = if dtype.type_size().is_some() {
Some(dtype.byte_size_for_n_elements(n_elements)?)
} else {
None
};
Ok(GgufTensorInfo {
name,
shape: shape_usz,
dtype,
data_offset: relative_offset,
byte_len,
})
}
const fn metadata_type_name(value: &GgufMetadataValue) -> &'static str {
match value {
GgufMetadataValue::U8(_) => "UINT8",
GgufMetadataValue::I8(_) => "INT8",
GgufMetadataValue::U16(_) => "UINT16",
GgufMetadataValue::I16(_) => "INT16",
GgufMetadataValue::U32(_) => "UINT32",
GgufMetadataValue::I32(_) => "INT32",
GgufMetadataValue::F32(_) => "FLOAT32",
GgufMetadataValue::Bool(_) => "BOOL",
GgufMetadataValue::String(_) => "STRING",
GgufMetadataValue::Array(_) => "ARRAY",
GgufMetadataValue::U64(_) => "UINT64",
GgufMetadataValue::I64(_) => "INT64",
GgufMetadataValue::F64(_) => "FLOAT64",
}
}
fn align_up(offset: u64, alignment: u64) -> crate::Result<u64> {
if alignment == 0 {
return Err(AnamnesisError::Parse {
reason: "GGUF: general.alignment must be non-zero".into(),
});
}
let rem = offset % alignment;
if rem == 0 {
return Ok(offset);
}
let padding = alignment - rem;
offset
.checked_add(padding)
.ok_or_else(|| AnamnesisError::Parse {
reason: format!(
"GGUF: alignment padding overflow (offset {offset}, alignment {alignment})"
),
})
}
#[cfg(test)]
#[allow(
clippy::indexing_slicing,
clippy::as_conversions,
clippy::cast_possible_truncation,
clippy::unwrap_used,
clippy::expect_used,
clippy::panic,
clippy::float_cmp,
clippy::wildcard_enum_match_arm,
clippy::manual_is_multiple_of
)]
mod tests {
use super::*;
use std::io::Write;
struct GgufBuilder {
buf: Vec<u8>,
}
impl GgufBuilder {
fn new() -> Self {
Self { buf: Vec::new() }
}
fn push_bytes(&mut self, bytes: &[u8]) {
self.buf.extend_from_slice(bytes);
}
fn push_u32(&mut self, v: u32) {
self.buf.extend_from_slice(&v.to_le_bytes());
}
fn push_u64(&mut self, v: u64) {
self.buf.extend_from_slice(&v.to_le_bytes());
}
fn push_string(&mut self, s: &str) {
self.push_u64(s.len() as u64);
self.buf.extend_from_slice(s.as_bytes());
}
fn push_kv_uint32(&mut self, key: &str, value: u32) {
self.push_string(key);
self.push_u32(4); self.push_u32(value);
}
fn push_kv_string(&mut self, key: &str, value: &str) {
self.push_string(key);
self.push_u32(8); self.push_string(value);
}
fn push_kv_f32_array(&mut self, key: &str, values: &[f32]) {
self.push_string(key);
self.push_u32(9); self.push_u32(6); self.push_u64(values.len() as u64);
for v in values {
self.buf.extend_from_slice(&v.to_le_bytes());
}
}
fn push_tensor_info(
&mut self,
name: &str,
shape: &[u64],
dtype_disc: u32,
relative_offset: u64,
) {
self.push_string(name);
self.push_u32(u32::try_from(shape.len()).expect("shape len fits in u32 for tests"));
for &d in shape {
self.push_u64(d);
}
self.push_u32(dtype_disc);
self.push_u64(relative_offset);
}
fn pad_to_alignment(&mut self, alignment: usize) {
while self.buf.len() % alignment != 0 {
self.buf.push(0);
}
}
fn finish(self) -> Vec<u8> {
self.buf
}
}
fn build_minimal_gguf() -> Vec<u8> {
let mut b = GgufBuilder::new();
b.push_bytes(b"GGUF");
b.push_u32(3); b.push_u64(2); b.push_u64(3);
b.push_kv_string("general.architecture", "test");
b.push_kv_uint32("general.alignment", 32);
b.push_kv_f32_array("test.values", &[1.0, 2.0, 3.0]);
b.push_tensor_info("tensor.a", &[2, 3], 0, 0);
b.push_tensor_info("tensor.b", &[64], 2, 32);
b.pad_to_alignment(32);
b.push_bytes(&[0u8; 24]);
b.pad_to_alignment(32);
b.push_bytes(&[0u8; 36]);
b.finish()
}
fn write_temp_gguf(bytes: &[u8]) -> tempfile::NamedTempFile {
let mut f = tempfile::NamedTempFile::new().unwrap();
f.write_all(bytes).unwrap();
f.flush().unwrap();
f
}
#[test]
fn parse_minimal_gguf_succeeds() {
let bytes = build_minimal_gguf();
let tmp = write_temp_gguf(&bytes);
let parsed = parse_gguf(tmp.path()).unwrap();
assert_eq!(parsed.version(), 3);
assert_eq!(parsed.alignment(), 32);
assert_eq!(parsed.len(), 2);
assert!(!parsed.is_empty());
let infos = parsed.tensor_info();
assert_eq!(infos[0].name, "tensor.a");
assert_eq!(infos[0].shape, vec![2, 3]);
assert_eq!(infos[0].dtype, GgufType::F32);
assert_eq!(infos[0].byte_len, Some(24));
assert_eq!(infos[1].name, "tensor.b");
assert_eq!(infos[1].shape, vec![64]);
assert_eq!(infos[1].dtype, GgufType::Q4_0);
assert_eq!(infos[1].byte_len, Some(36));
let metadata = parsed.metadata();
assert_eq!(
metadata
.get("general.architecture")
.and_then(|v| v.as_string()),
Some("test")
);
assert_eq!(
metadata
.get("general.alignment")
.and_then(GgufMetadataValue::as_u32),
Some(32)
);
let arr = metadata
.get("test.values")
.and_then(GgufMetadataValue::as_array)
.unwrap();
assert_eq!(arr.len(), 3);
let f32s = arr
.as_f32_slice()
.expect("test.values should be an F32 array");
assert_eq!(f32s, &[1.0f32, 2.0, 3.0]);
}
#[test]
fn tensors_returns_zero_copy_borrowed_slices() {
let bytes = build_minimal_gguf();
let tmp = write_temp_gguf(&bytes);
let parsed = parse_gguf(tmp.path()).unwrap();
let tensors: Vec<_> = parsed.tensors().collect();
assert_eq!(tensors.len(), 2);
for t in &tensors {
assert!(matches!(t.data, Cow::Borrowed(_)));
}
assert_eq!(tensors[0].data.len(), 24);
assert_eq!(tensors[1].data.len(), 36);
assert_eq!(tensors[0].name, "tensor.a");
assert_eq!(tensors[0].shape, &[2_usize, 3]);
assert_eq!(tensors[1].name, "tensor.b");
assert_eq!(tensors[1].shape, &[64_usize]);
}
#[test]
fn inspect_info_reports_expected_fields() {
let bytes = build_minimal_gguf();
let tmp = write_temp_gguf(&bytes);
let parsed = parse_gguf(tmp.path()).unwrap();
let info = parsed.inspect();
assert_eq!(info.version, 3);
assert_eq!(info.architecture.as_deref(), Some("test"));
assert_eq!(info.tensor_count, 2);
assert_eq!(info.total_bytes, 24 + 36);
assert_eq!(info.unknown_size_tensors, 0);
assert_eq!(info.alignment, 32);
assert_eq!(info.dtypes, vec![GgufType::F32, GgufType::Q4_0]);
let rendered = info.to_string();
assert!(rendered.contains("GGUF v3"));
assert!(rendered.contains("Arch: test"));
assert!(rendered.contains("Tensors: 2"));
assert!(rendered.contains("Dtypes: F32, Q4_0"));
assert!(rendered.contains("Alignment: 32 bytes"));
}
#[test]
fn typed_array_f32_uses_native_storage() {
let mut b = GgufBuilder::new();
b.push_bytes(b"GGUF");
b.push_u32(3);
b.push_u64(0);
b.push_u64(1);
b.push_kv_f32_array("logits", &[1.5, -2.25, 0.0, 3.5, 7.125]);
let tmp = write_temp_gguf(&b.finish());
let parsed = parse_gguf(tmp.path()).unwrap();
let arr = parsed
.metadata()
.get("logits")
.and_then(GgufMetadataValue::as_array)
.unwrap();
assert!(matches!(arr, GgufMetadataArray::F32(_)));
let slice = arr.as_f32_slice().unwrap();
assert_eq!(slice, &[1.5f32, -2.25, 0.0, 3.5, 7.125]);
assert_eq!(arr.len(), 5);
assert!(!arr.is_empty());
}
#[test]
fn metadata_value_size_is_bounded() {
assert_eq!(std::mem::size_of::<GgufMetadataValue>(), 24);
assert_eq!(std::mem::size_of::<GgufMetadataArray>(), 32);
}
#[test]
fn parse_header_only_file_is_accepted() {
let mut b = GgufBuilder::new();
b.push_bytes(b"GGUF");
b.push_u32(3);
b.push_u64(0);
b.push_u64(0);
let bytes = b.finish();
assert_eq!(bytes.len(), 24);
let tmp = write_temp_gguf(&bytes);
let parsed = parse_gguf(tmp.path()).unwrap();
assert_eq!(parsed.version(), 3);
assert_eq!(parsed.alignment(), 32);
assert_eq!(parsed.len(), 0);
assert!(parsed.is_empty());
assert!(parsed.metadata().is_empty());
assert!(parsed.tensor_info().is_empty());
assert_eq!(parsed.tensors().count(), 0);
}
#[test]
fn alignment_defaults_to_32_when_metadata_absent() {
let mut b = GgufBuilder::new();
b.push_bytes(b"GGUF");
b.push_u32(3);
b.push_u64(1);
b.push_u64(1);
b.push_kv_string("general.architecture", "test");
b.push_tensor_info("x", &[1], 0, 0);
b.pad_to_alignment(32);
b.push_bytes(&[0u8; 4]);
let bytes = b.finish();
let tmp = write_temp_gguf(&bytes);
let parsed = parse_gguf(tmp.path()).unwrap();
assert_eq!(parsed.alignment(), 32);
}
#[test]
fn reject_file_too_small() {
let tmp = write_temp_gguf(b"GGU");
let err = parse_gguf(tmp.path()).unwrap_err();
assert!(matches!(err, AnamnesisError::Parse { .. }));
}
#[test]
fn reject_bad_magic() {
let tmp = write_temp_gguf(b"XXXX\x00\x00\x00\x00");
let err = parse_gguf(tmp.path()).unwrap_err();
assert!(matches!(err, AnamnesisError::Parse { .. }));
}
#[test]
fn reject_legacy_ggml_magic() {
let mut bytes = Vec::new();
bytes.extend_from_slice(b"GGML");
bytes.extend_from_slice(&[0u8; 100]);
let tmp = write_temp_gguf(&bytes);
let err = parse_gguf(tmp.path()).unwrap_err();
match err {
AnamnesisError::Unsupported { format, detail } => {
assert_eq!(format, "GGUF");
assert!(detail.contains("GGML"));
}
other => panic!("expected Unsupported, got {other:?}"),
}
}
#[test]
fn reject_v1() {
let mut b = GgufBuilder::new();
b.push_bytes(b"GGUF");
b.push_u32(1); b.push_u64(0);
b.push_u64(0);
let tmp = write_temp_gguf(&b.finish());
let err = parse_gguf(tmp.path()).unwrap_err();
assert!(matches!(err, AnamnesisError::Unsupported { .. }));
}
#[test]
fn reject_truncated_file() {
let bytes = build_minimal_gguf();
let truncated = &bytes[..bytes.len() - 20];
let tmp = write_temp_gguf(truncated);
let err = parse_gguf(tmp.path()).unwrap_err();
assert!(matches!(err, AnamnesisError::Parse { .. }));
}
#[test]
fn reject_tensor_data_out_of_bounds() {
let mut b = GgufBuilder::new();
b.push_bytes(b"GGUF");
b.push_u32(3);
b.push_u64(1);
b.push_u64(0);
b.push_tensor_info("huge", &[1000], 0, 0);
b.pad_to_alignment(32);
b.push_bytes(&[0u8; 32]);
let tmp = write_temp_gguf(&b.finish());
let err = parse_gguf(tmp.path()).unwrap_err();
match err {
AnamnesisError::Parse { reason } => {
assert!(reason.contains("exceeds file size"), "got: {reason}");
}
other => panic!("expected Parse, got {other:?}"),
}
}
#[test]
fn reject_zero_dimension() {
let mut b = GgufBuilder::new();
b.push_bytes(b"GGUF");
b.push_u32(3);
b.push_u64(1);
b.push_u64(0);
b.push_tensor_info("zero", &[0], 0, 0);
b.pad_to_alignment(32);
let tmp = write_temp_gguf(&b.finish());
let err = parse_gguf(tmp.path()).unwrap_err();
assert!(matches!(err, AnamnesisError::Parse { .. }));
}
#[test]
fn reject_unaligned_relative_offset() {
let mut b = GgufBuilder::new();
b.push_bytes(b"GGUF");
b.push_u32(3);
b.push_u64(1);
b.push_u64(0);
b.push_tensor_info("misaligned", &[1], 0, 1);
b.pad_to_alignment(32);
b.push_bytes(&[0u8; 64]);
let tmp = write_temp_gguf(&b.finish());
let err = parse_gguf(tmp.path()).unwrap_err();
match err {
AnamnesisError::Parse { reason } => {
assert!(
reason.contains("not a multiple of alignment"),
"expected alignment error, got: {reason}"
);
assert!(reason.contains("misaligned"), "got: {reason}");
}
other => panic!("expected Parse, got {other:?}"),
}
}
#[test]
fn accept_aligned_nonzero_relative_offset() {
let bytes = build_minimal_gguf();
let parsed = parse_gguf(write_temp_gguf(&bytes).path()).unwrap();
assert_eq!(parsed.len(), 2);
assert_eq!(parsed.tensor_info()[1].name, "tensor.b");
}
#[test]
fn reject_array_depth_exceeded() {
let mut b = GgufBuilder::new();
b.push_bytes(b"GGUF");
b.push_u32(3);
b.push_u64(0);
b.push_u64(1);
b.push_string("nested");
b.push_u32(9); for _ in 0..5 {
b.push_u32(9); b.push_u64(1); }
let tmp = write_temp_gguf(&b.finish());
let err = parse_gguf(tmp.path()).unwrap_err();
match err {
AnamnesisError::Parse { reason } => {
assert!(
reason.contains("depth cap"),
"expected depth-cap error, got: {reason}"
);
}
other => panic!("expected Parse, got {other:?}"),
}
}
#[test]
fn reject_bad_bool_byte() {
let mut b = GgufBuilder::new();
b.push_bytes(b"GGUF");
b.push_u32(3);
b.push_u64(0);
b.push_u64(1);
b.push_string("weird");
b.push_u32(7); b.push_bytes(&[7]); let tmp = write_temp_gguf(&b.finish());
let err = parse_gguf(tmp.path()).unwrap_err();
assert!(matches!(err, AnamnesisError::Parse { .. }));
}
#[test]
fn reject_zero_alignment() {
let mut b = GgufBuilder::new();
b.push_bytes(b"GGUF");
b.push_u32(3);
b.push_u64(0);
b.push_u64(1);
b.push_kv_uint32("general.alignment", 0);
let tmp = write_temp_gguf(&b.finish());
let err = parse_gguf(tmp.path()).unwrap_err();
assert!(matches!(err, AnamnesisError::Parse { .. }));
}
#[test]
fn byte_size_table_spot_checks() {
assert_eq!(GgufType::F32.block_size(), 1);
assert_eq!(GgufType::F32.type_size(), Some(4));
assert_eq!(GgufType::F32.byte_size_for_n_elements(10).unwrap(), 40);
assert_eq!(GgufType::Q4_0.block_size(), 32);
assert_eq!(GgufType::Q4_0.type_size(), Some(18));
assert_eq!(GgufType::Q4_0.byte_size_for_n_elements(64).unwrap(), 36);
assert_eq!(GgufType::Q4_K.block_size(), 256);
assert_eq!(GgufType::Q4_K.type_size(), Some(144));
assert_eq!(GgufType::Q4_K.byte_size_for_n_elements(256).unwrap(), 144);
assert_eq!(GgufType::Q8_0.block_size(), 32);
assert_eq!(GgufType::Q8_0.type_size(), Some(34));
assert_eq!(GgufType::Q8_0.byte_size_for_n_elements(32).unwrap(), 34);
assert_eq!(GgufType::Q8_K.type_size(), Some(292));
assert_eq!(GgufType::Q6_K.type_size(), Some(210));
assert_eq!(GgufType::IQ4_NL.block_size(), 32);
assert_eq!(GgufType::IQ4_NL.type_size(), Some(18));
assert_eq!(GgufType::IQ4_NL.byte_size_for_n_elements(64).unwrap(), 36);
assert_eq!(GgufType::IQ4_XS.block_size(), 256);
assert_eq!(GgufType::IQ4_XS.type_size(), Some(136));
assert_eq!(GgufType::IQ4_XS.byte_size_for_n_elements(256).unwrap(), 136);
assert_eq!(GgufType::IQ2_XXS.block_size(), 256);
assert_eq!(GgufType::IQ2_XXS.type_size(), Some(66));
assert_eq!(GgufType::IQ2_XXS.byte_size_for_n_elements(256).unwrap(), 66);
assert_eq!(GgufType::IQ2_XS.block_size(), 256);
assert_eq!(GgufType::IQ2_XS.type_size(), Some(74));
assert_eq!(GgufType::IQ2_XS.byte_size_for_n_elements(256).unwrap(), 74);
assert_eq!(GgufType::IQ2_S.block_size(), 256);
assert_eq!(GgufType::IQ2_S.type_size(), Some(82));
assert_eq!(GgufType::IQ2_S.byte_size_for_n_elements(256).unwrap(), 82);
assert_eq!(GgufType::IQ3_XXS.block_size(), 256);
assert_eq!(GgufType::IQ3_XXS.type_size(), Some(98));
assert_eq!(GgufType::IQ3_XXS.byte_size_for_n_elements(256).unwrap(), 98);
assert_eq!(GgufType::IQ3_S.block_size(), 256);
assert_eq!(GgufType::IQ3_S.type_size(), Some(110));
assert_eq!(GgufType::IQ3_S.byte_size_for_n_elements(256).unwrap(), 110);
assert_eq!(GgufType::IQ1_S.block_size(), 256);
assert_eq!(GgufType::IQ1_S.type_size(), Some(50));
assert_eq!(GgufType::IQ1_S.byte_size_for_n_elements(256).unwrap(), 50);
assert_eq!(GgufType::IQ1_M.block_size(), 256);
assert_eq!(GgufType::IQ1_M.type_size(), Some(56));
assert_eq!(GgufType::IQ1_M.byte_size_for_n_elements(256).unwrap(), 56);
assert_eq!(GgufType::TQ1_0.block_size(), 256);
assert_eq!(GgufType::TQ1_0.type_size(), Some(54));
assert_eq!(GgufType::TQ1_0.byte_size_for_n_elements(256).unwrap(), 54);
assert_eq!(GgufType::TQ2_0.block_size(), 256);
assert_eq!(GgufType::TQ2_0.type_size(), Some(66));
assert_eq!(GgufType::TQ2_0.byte_size_for_n_elements(256).unwrap(), 66);
assert_eq!(GgufType::MXFP4.block_size(), 32);
assert_eq!(GgufType::MXFP4.type_size(), Some(17));
assert_eq!(GgufType::MXFP4.byte_size_for_n_elements(64).unwrap(), 34);
}
#[test]
fn is_quantized_classifies_correctly() {
assert!(!GgufType::F32.is_quantized());
assert!(!GgufType::BF16.is_quantized());
assert!(!GgufType::I32.is_quantized());
assert!(GgufType::Q4_0.is_quantized());
assert!(GgufType::Q4_K.is_quantized());
assert!(GgufType::IQ4_XS.is_quantized());
}
#[test]
fn byte_size_rejects_non_multiple_of_block() {
let err = GgufType::Q4_0.byte_size_for_n_elements(17).unwrap_err();
assert!(matches!(err, AnamnesisError::Parse { .. }));
}
#[test]
fn align_up_behaves() {
assert_eq!(align_up(0, 32).unwrap(), 0);
assert_eq!(align_up(1, 32).unwrap(), 32);
assert_eq!(align_up(32, 32).unwrap(), 32);
assert_eq!(align_up(33, 32).unwrap(), 64);
assert_eq!(align_up(100, 16).unwrap(), 112);
}
#[test]
fn gguf_type_display_roundtrip() {
assert_eq!(GgufType::F32.to_string(), "F32");
assert_eq!(GgufType::Q4_K.to_string(), "Q4_K");
assert_eq!(GgufType::IQ4_XS.to_string(), "IQ4_XS");
assert_eq!(GgufType::BF16.to_string(), "BF16");
}
}