use std::io::Read;
use std::path::Path;
use std::time::Instant;
#[derive(Debug, thiserror::Error)]
pub enum LoadError {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("GGUF parse error: {0}")]
Parse(String),
#[error("memory budget exceeded: need {need} bytes, budget {budget} bytes")]
MemoryBudgetExceeded { need: u64, budget: u64 },
#[error("unsupported GGUF version: {0}")]
UnsupportedVersion(u32),
#[error("validation failed: {0}")]
ValidationFailed(String),
}
#[derive(Debug, Clone)]
pub struct LoadConfig {
pub max_memory_bytes: Option<usize>,
pub validate_checksums: bool,
pub allow_unknown_quant_types: bool,
pub streaming_chunk_size: usize,
pub strict_version: bool,
}
impl Default for LoadConfig {
fn default() -> Self {
Self {
max_memory_bytes: None,
validate_checksums: false,
allow_unknown_quant_types: true,
streaming_chunk_size: 4 * 1024 * 1024, strict_version: false,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct LoadStats {
pub tensors_loaded: usize,
pub bytes_loaded: u64,
pub skipped_tensors: usize,
pub load_time_ms: u64,
pub peak_memory_bytes: usize,
pub validation_warnings: Vec<String>,
}
const KNOWN_QUANT_TYPES: &[(u32, &str)] = &[
(0, "F32"),
(1, "F16"),
(2, "Q4_0"),
(3, "Q4_1"),
(6, "Q5_0"),
(7, "Q5_1"),
(8, "Q8_0"),
(9, "Q8_1"),
(10, "Q2_K"),
(11, "Q3_K"),
(12, "Q4_K"),
(13, "Q5_K"),
(14, "Q6_K"),
(15, "Q8_K"),
(30, "BF16"),
(35, "TQ2_0"),
(41, "Q1_0_g128"),
(42, "TQ2_0_g128"),
];
#[derive(Debug, Clone)]
pub struct TensorEntry {
pub name: String,
pub shape: Vec<u64>,
pub quant_type_id: u32,
pub offset: u64,
pub size_bytes: u64,
}
impl TensorEntry {
pub fn element_count(&self) -> u64 {
self.shape.iter().product()
}
pub fn quant_name(&self) -> &'static str {
KNOWN_QUANT_TYPES
.iter()
.find(|(id, _)| *id == self.quant_type_id)
.map(|(_, name)| *name)
.unwrap_or("UNKNOWN")
}
pub fn is_known_quant(&self) -> bool {
KNOWN_QUANT_TYPES
.iter()
.any(|(id, _)| *id == self.quant_type_id)
}
}
const GGUF_MAGIC: u32 = 0x4655_4747;
const SUPPORTED_VERSIONS: &[u32] = &[2, 3];
fn read_u32_le(buf: &[u8], pos: &mut usize) -> Result<u32, LoadError> {
if *pos + 4 > buf.len() {
return Err(LoadError::Parse(format!(
"unexpected EOF at offset {} reading u32",
pos
)));
}
let v = u32::from_le_bytes(
buf[*pos..*pos + 4]
.try_into()
.map_err(|_| LoadError::Parse("slice conversion failed for u32".to_string()))?,
);
*pos += 4;
Ok(v)
}
fn read_u64_le(buf: &[u8], pos: &mut usize) -> Result<u64, LoadError> {
if *pos + 8 > buf.len() {
return Err(LoadError::Parse(format!(
"unexpected EOF at offset {} reading u64",
pos
)));
}
let v = u64::from_le_bytes(
buf[*pos..*pos + 8]
.try_into()
.map_err(|_| LoadError::Parse("slice conversion failed for u64".to_string()))?,
);
*pos += 8;
Ok(v)
}
fn read_gguf_string(buf: &[u8], pos: &mut usize) -> Result<String, LoadError> {
let len = read_u64_le(buf, pos)? as usize;
if *pos + len > buf.len() {
return Err(LoadError::Parse(format!(
"string of length {len} extends beyond buffer"
)));
}
let s = std::str::from_utf8(&buf[*pos..*pos + len])
.map_err(|e| LoadError::Parse(format!("invalid UTF-8 in string: {e}")))?
.to_string();
*pos += len;
Ok(s)
}
fn skip_metadata_value(buf: &[u8], pos: &mut usize, value_type: u32) -> Result<(), LoadError> {
match value_type {
0 | 1 => {
if *pos + 1 > buf.len() {
return Err(LoadError::Parse("EOF in u8/i8 value".to_string()));
}
*pos += 1;
}
2 | 3 => {
if *pos + 2 > buf.len() {
return Err(LoadError::Parse("EOF in u16/i16 value".to_string()));
}
*pos += 2;
}
4..=7 => {
if *pos + 4 > buf.len() {
return Err(LoadError::Parse(
"EOF in u32/i32/f32/bool value".to_string(),
));
}
*pos += 4;
}
8 => {
read_gguf_string(buf, pos)?;
}
9 => {
let elem_type = read_u32_le(buf, pos)?;
let count = read_u64_le(buf, pos)?;
for _ in 0..count {
skip_metadata_value(buf, pos, elem_type)?;
}
}
10..=12 => {
if *pos + 8 > buf.len() {
return Err(LoadError::Parse("EOF in u64/i64/f64 value".to_string()));
}
*pos += 8;
}
other => {
return Err(LoadError::Parse(format!(
"unknown metadata value type id: {other}"
)));
}
}
Ok(())
}
struct ParsedGgufMeta {
version: u32,
tensor_entries: Vec<TensorEntry>,
}
fn parse_gguf_meta(buf: &[u8]) -> Result<ParsedGgufMeta, LoadError> {
let mut pos = 0usize;
let magic = read_u32_le(buf, &mut pos)?;
if magic != GGUF_MAGIC {
return Err(LoadError::Parse(format!(
"invalid GGUF magic: 0x{:08X} (expected 0x{:08X})",
magic, GGUF_MAGIC
)));
}
let version = read_u32_le(buf, &mut pos)?;
let tensor_count = read_u64_le(buf, &mut pos)?;
let metadata_kv_count = read_u64_le(buf, &mut pos)?;
for _ in 0..metadata_kv_count {
read_gguf_string(buf, &mut pos)?;
let value_type = read_u32_le(buf, &mut pos)?;
skip_metadata_value(buf, &mut pos, value_type)?;
}
let mut tensor_entries = Vec::with_capacity(tensor_count as usize);
for _ in 0..tensor_count {
let name = read_gguf_string(buf, &mut pos)?;
let n_dims = read_u32_le(buf, &mut pos)?;
let mut shape = Vec::with_capacity(n_dims as usize);
for _ in 0..n_dims {
shape.push(read_u64_le(buf, &mut pos)?);
}
let quant_type_id = read_u32_le(buf, &mut pos)?;
let offset = read_u64_le(buf, &mut pos)?;
let size_bytes = compute_tensor_size_bytes(&shape, quant_type_id);
tensor_entries.push(TensorEntry {
name,
shape,
quant_type_id,
offset,
size_bytes,
});
}
Ok(ParsedGgufMeta {
version,
tensor_entries,
})
}
fn compute_tensor_size_bytes(shape: &[u64], quant_type_id: u32) -> u64 {
let element_count: u64 = shape.iter().product();
let (block_size, block_bytes): (u64, u64) = match quant_type_id {
0 => (1, 4), 1 => (1, 2), 2 => (32, 18), 3 => (32, 20), 6 => (32, 22), 7 => (32, 24), 8 => (32, 34), 9 => (32, 40), 10 => (256, 84), 11 => (256, 110), 12 => (256, 144), 13 => (256, 176), 14 => (256, 210), 15 => (256, 292), 30 => (1, 2), 35 => (256, 66), 41 => (128, 18), 42 => (128, 34), _ => (1, 1),
};
let num_blocks = element_count.div_ceil(block_size);
num_blocks * block_bytes
}
pub fn validate_gguf_file(path: &Path) -> Result<Vec<String>, LoadError> {
let mut file = std::fs::File::open(path)?;
let mut buf = Vec::new();
file.read_to_end(&mut buf)?;
let mut warnings = Vec::new();
let start = Instant::now();
let meta = parse_gguf_meta(&buf)?;
if !SUPPORTED_VERSIONS.contains(&meta.version) {
warnings.push(format!(
"GGUF version {} is not in the officially supported set {:?}",
meta.version, SUPPORTED_VERSIONS
));
}
if meta.tensor_entries.is_empty() {
warnings.push("file contains zero tensors".to_string());
}
for entry in &meta.tensor_entries {
if !entry.is_known_quant() {
warnings.push(format!(
"tensor '{}' has unknown quantisation type id {}",
entry.name, entry.quant_type_id
));
}
if entry.shape.is_empty() {
warnings.push(format!(
"tensor '{}' has zero-dimensional shape",
entry.name
));
}
}
let _elapsed = start.elapsed();
Ok(warnings)
}
pub fn load_tensor_metadata(path: &Path) -> Result<Vec<TensorEntry>, LoadError> {
let _t0 = Instant::now();
let mut file = std::fs::File::open(path)?;
let mut buf = Vec::new();
file.read_to_end(&mut buf)?;
let meta = parse_gguf_meta(&buf)?;
Ok(meta.tensor_entries)
}
pub fn estimate_memory_bytes(path: &Path) -> Result<u64, LoadError> {
let entries = load_tensor_metadata(path)?;
let total: u64 = entries.iter().map(|e| e.size_bytes).sum();
Ok(total)
}
pub fn fits_in_budget(path: &Path, budget_bytes: u64) -> Result<bool, LoadError> {
let need = estimate_memory_bytes(path)?;
Ok(need <= budget_bytes)
}
pub struct TensorChunkIter {
data: Vec<u8>,
chunk_size: usize,
pos: usize,
}
impl TensorChunkIter {
pub fn new(data: Vec<u8>, chunk_size: usize) -> Self {
assert!(chunk_size > 0, "chunk_size must be > 0");
Self {
data,
chunk_size,
pos: 0,
}
}
pub fn total_chunks(&self) -> usize {
if self.data.is_empty() {
return 0;
}
self.data.len().div_ceil(self.chunk_size)
}
pub fn bytes_remaining(&self) -> usize {
self.data.len().saturating_sub(self.pos)
}
}
impl Iterator for TensorChunkIter {
type Item = Vec<u8>;
fn next(&mut self) -> Option<Self::Item> {
if self.pos >= self.data.len() {
return None;
}
let end = (self.pos + self.chunk_size).min(self.data.len());
let chunk = self.data[self.pos..end].to_vec();
self.pos = end;
Some(chunk)
}
}