use std::fs::File;
use std::io::{BufReader, BufWriter, Read, Write};
use scivex_core::{Float, Tensor};
use crate::error::{NnError, Result};
const GGUF_MAGIC: &[u8; 4] = b"GGUF";
const GGUF_VERSION: u32 = 3;
const ALIGNMENT: usize = 32;
const GGUF_TYPE_UINT32: u32 = 4;
const GGUF_TYPE_INT32: u32 = 5;
const GGUF_TYPE_FLOAT32: u32 = 6;
const GGUF_TYPE_STRING: u32 = 8;
const GGUF_TYPE_UINT64: u32 = 10;
const GGML_TYPE_F32: u32 = 0;
const GGML_TYPE_F16: u32 = 1;
const GGML_TYPE_F64: u32 = 28;
fn ser_err(msg: &str) -> NnError {
NnError::SerializeError(msg.to_string())
}
fn io_err() -> NnError {
NnError::SerializeError("I/O error during GGUF persistence".to_string())
}
#[cfg_attr(
feature = "serde-support",
derive(serde::Serialize, serde::Deserialize)
)]
#[derive(Debug, Clone, PartialEq)]
pub enum GgufValue {
Uint32(u32),
Int32(i32),
Float32(f32),
String(String),
Uint64(u64),
}
#[cfg_attr(
feature = "serde-support",
derive(serde::Serialize, serde::Deserialize)
)]
#[derive(Debug, Clone)]
pub struct GgufFile<T: Float> {
pub metadata: Vec<(String, GgufValue)>,
pub tensors: Vec<(String, Tensor<T>)>,
}
fn read_u32(r: &mut impl Read) -> Result<u32> {
let mut buf = [0u8; 4];
r.read_exact(&mut buf).map_err(|_| io_err())?;
Ok(u32::from_le_bytes(buf))
}
fn read_u64(r: &mut impl Read) -> Result<u64> {
let mut buf = [0u8; 8];
r.read_exact(&mut buf).map_err(|_| io_err())?;
Ok(u64::from_le_bytes(buf))
}
fn read_i32(r: &mut impl Read) -> Result<i32> {
let mut buf = [0u8; 4];
r.read_exact(&mut buf).map_err(|_| io_err())?;
Ok(i32::from_le_bytes(buf))
}
fn read_f32(r: &mut impl Read) -> Result<f32> {
let mut buf = [0u8; 4];
r.read_exact(&mut buf).map_err(|_| io_err())?;
Ok(f32::from_le_bytes(buf))
}
fn read_gguf_string(r: &mut impl Read) -> Result<String> {
let len = read_u64(r)? as usize;
let mut buf = vec![0u8; len];
r.read_exact(&mut buf).map_err(|_| io_err())?;
String::from_utf8(buf).map_err(|_| ser_err("invalid UTF-8 in GGUF string"))
}
fn read_gguf_value(r: &mut impl Read) -> Result<GgufValue> {
let type_id = read_u32(r)?;
match type_id {
GGUF_TYPE_UINT32 => Ok(GgufValue::Uint32(read_u32(r)?)),
GGUF_TYPE_INT32 => Ok(GgufValue::Int32(read_i32(r)?)),
GGUF_TYPE_FLOAT32 => Ok(GgufValue::Float32(read_f32(r)?)),
GGUF_TYPE_STRING => Ok(GgufValue::String(read_gguf_string(r)?)),
GGUF_TYPE_UINT64 => Ok(GgufValue::Uint64(read_u64(r)?)),
_ => Err(ser_err(&format!("unsupported GGUF value type: {type_id}"))),
}
}
fn write_u32(w: &mut impl Write, v: u32) -> Result<()> {
w.write_all(&v.to_le_bytes()).map_err(|_| io_err())
}
fn write_u64(w: &mut impl Write, v: u64) -> Result<()> {
w.write_all(&v.to_le_bytes()).map_err(|_| io_err())
}
fn write_i32(w: &mut impl Write, v: i32) -> Result<()> {
w.write_all(&v.to_le_bytes()).map_err(|_| io_err())
}
fn write_f32(w: &mut impl Write, v: f32) -> Result<()> {
w.write_all(&v.to_le_bytes()).map_err(|_| io_err())
}
fn write_gguf_string(w: &mut impl Write, s: &str) -> Result<()> {
write_u64(w, s.len() as u64)?;
w.write_all(s.as_bytes()).map_err(|_| io_err())
}
fn write_gguf_value(w: &mut impl Write, val: &GgufValue) -> Result<()> {
match val {
GgufValue::Uint32(v) => {
write_u32(w, GGUF_TYPE_UINT32)?;
write_u32(w, *v)
}
GgufValue::Int32(v) => {
write_u32(w, GGUF_TYPE_INT32)?;
write_i32(w, *v)
}
GgufValue::Float32(v) => {
write_u32(w, GGUF_TYPE_FLOAT32)?;
write_f32(w, *v)
}
GgufValue::String(v) => {
write_u32(w, GGUF_TYPE_STRING)?;
write_gguf_string(w, v)
}
GgufValue::Uint64(v) => {
write_u32(w, GGUF_TYPE_UINT64)?;
write_u64(w, *v)
}
}
}
fn ggml_type_for<T: Float>() -> u32 {
let size = std::mem::size_of::<T>();
match size {
4 => GGML_TYPE_F32,
_ => GGML_TYPE_F64, }
}
fn ggml_type_size(type_id: u32) -> Result<usize> {
match type_id {
GGML_TYPE_F16 => Ok(2),
GGML_TYPE_F32 => Ok(4),
GGML_TYPE_F64 => Ok(8),
_ => Err(ser_err(&format!("unsupported GGML tensor type: {type_id}"))),
}
}
fn align_offset(offset: usize) -> usize {
let remainder = offset % ALIGNMENT;
if remainder == 0 {
offset
} else {
offset + (ALIGNMENT - remainder)
}
}
fn f16_to_f32(bits: u16) -> f32 {
let sign = u32::from((bits >> 15) & 1);
let exponent = u32::from((bits >> 10) & 0x1F);
let mantissa = u32::from(bits & 0x3FF);
if exponent == 0 {
if mantissa == 0 {
return f32::from_bits(sign << 31);
}
let mut m = mantissa;
let mut e: i32 = -14; while m & 0x400 == 0 {
m <<= 1;
e -= 1;
}
m &= 0x3FF; #[allow(clippy::cast_sign_loss)]
let f32_exp = (e + 127) as u32;
let f32_bits = (sign << 31) | (f32_exp << 23) | (m << 13);
return f32::from_bits(f32_bits);
}
if exponent == 31 {
let f32_bits = (sign << 31) | (0xFF << 23) | (mantissa << 13);
return f32::from_bits(f32_bits);
}
let f32_exp = exponent + 112; let f32_bits = (sign << 31) | (f32_exp << 23) | (mantissa << 13);
f32::from_bits(f32_bits)
}
struct TensorInfo {
name: String,
dims: Vec<usize>,
type_id: u32,
offset: u64,
}
fn read_tensor_info(r: &mut impl Read) -> Result<TensorInfo> {
let name = read_gguf_string(r)?;
let n_dims = read_u32(r)? as usize;
let mut dims = Vec::with_capacity(n_dims);
for _ in 0..n_dims {
dims.push(read_u64(r)? as usize);
}
let type_id = read_u32(r)?;
let offset = read_u64(r)?;
Ok(TensorInfo {
name,
dims,
type_id,
offset,
})
}
fn read_tensor_data<T: Float>(data: &[u8], info: &TensorInfo) -> Result<Tensor<T>> {
let numel: usize = if info.dims.is_empty() {
1
} else {
info.dims.iter().product()
};
let elem_size = ggml_type_size(info.type_id)?;
let byte_len = numel * elem_size;
let start = info.offset as usize;
let end = start + byte_len;
if end > data.len() {
return Err(ser_err("tensor data out of bounds in GGUF"));
}
let slice = &data[start..end];
let mut values = Vec::with_capacity(numel);
match info.type_id {
GGML_TYPE_F16 => {
for i in 0..numel {
let off = i * 2;
let bits = u16::from_le_bytes([slice[off], slice[off + 1]]);
let val = f16_to_f32(bits);
values.push(T::from_f64(f64::from(val)));
}
}
GGML_TYPE_F32 => {
for i in 0..numel {
let off = i * 4;
let val = f32::from_le_bytes([
slice[off],
slice[off + 1],
slice[off + 2],
slice[off + 3],
]);
values.push(T::from_f64(f64::from(val)));
}
}
GGML_TYPE_F64 => {
for i in 0..numel {
let off = i * 8;
let val = f64::from_le_bytes([
slice[off],
slice[off + 1],
slice[off + 2],
slice[off + 3],
slice[off + 4],
slice[off + 5],
slice[off + 6],
slice[off + 7],
]);
values.push(T::from_f64(val));
}
}
_ => {
return Err(ser_err(&format!(
"unsupported tensor type {}",
info.type_id
)));
}
}
let shape = if info.dims.is_empty() {
vec![1]
} else {
info.dims.clone()
};
Tensor::from_vec(values, shape).map_err(|e| ser_err(&format!("tensor creation: {e}")))
}
fn gguf_string_wire_size(s: &str) -> usize {
8 + s.len() }
fn gguf_value_wire_size(val: &GgufValue) -> usize {
let type_tag = 4usize; type_tag
+ match val {
GgufValue::Uint32(_) | GgufValue::Int32(_) | GgufValue::Float32(_) => 4,
GgufValue::String(s) => gguf_string_wire_size(s),
GgufValue::Uint64(_) => 8,
}
}
fn gguf_kv_wire_size(key: &str, val: &GgufValue) -> usize {
gguf_string_wire_size(key) + gguf_value_wire_size(val)
}
fn tensor_info_wire_size(name: &str, n_dims: usize) -> usize {
gguf_string_wire_size(name) + 4 + n_dims * 8 + 4 + 8 }
pub fn load_gguf<T: Float>(path: &str) -> Result<GgufFile<T>> {
let f = File::open(path).map_err(|_| ser_err("cannot open GGUF file"))?;
let mut r = BufReader::new(f);
let mut magic = [0u8; 4];
r.read_exact(&mut magic).map_err(|_| io_err())?;
if &magic != GGUF_MAGIC {
return Err(ser_err("not a valid GGUF file (bad magic)"));
}
let version = read_u32(&mut r)?;
#[allow(clippy::manual_range_contains)]
if version < 2 || version > 3 {
return Err(ser_err(&format!("unsupported GGUF version: {version}")));
}
let tensor_count = read_u64(&mut r)? as usize;
let kv_count = read_u64(&mut r)? as usize;
let mut metadata = Vec::with_capacity(kv_count);
for _ in 0..kv_count {
let key = read_gguf_string(&mut r)?;
let val = read_gguf_value(&mut r)?;
metadata.push((key, val));
}
let mut tensor_infos = Vec::with_capacity(tensor_count);
for _ in 0..tensor_count {
tensor_infos.push(read_tensor_info(&mut r)?);
}
let mut all_data = Vec::new();
r.read_to_end(&mut all_data).map_err(|_| io_err())?;
let mut header_bytes: usize = 4 + 4 + 8 + 8;
for (k, v) in &metadata {
header_bytes += gguf_kv_wire_size(k, v);
}
for info in &tensor_infos {
header_bytes += tensor_info_wire_size(&info.name, info.dims.len());
}
let data_start_aligned = align_offset(header_bytes);
let padding = data_start_aligned - header_bytes;
let data_section = if padding <= all_data.len() {
&all_data[padding..]
} else {
&all_data
};
let mut tensors = Vec::with_capacity(tensor_count);
for info in &tensor_infos {
let tensor = read_tensor_data(data_section, info)?;
tensors.push((info.name.clone(), tensor));
}
Ok(GgufFile { metadata, tensors })
}
pub fn save_gguf<T: Float>(path: &str, file: &GgufFile<T>) -> Result<()> {
let f = File::create(path).map_err(|_| ser_err("cannot create GGUF file"))?;
let mut w = BufWriter::new(f);
w.write_all(GGUF_MAGIC).map_err(|_| io_err())?;
write_u32(&mut w, GGUF_VERSION)?;
write_u64(&mut w, file.tensors.len() as u64)?;
write_u64(&mut w, file.metadata.len() as u64)?;
for (key, val) in &file.metadata {
write_gguf_string(&mut w, key)?;
write_gguf_value(&mut w, val)?;
}
let ggml_type = ggml_type_for::<T>();
let elem_size = ggml_type_size(ggml_type)?;
let mut header_size: usize = 4 + 4 + 8 + 8;
for (k, v) in &file.metadata {
header_size += gguf_kv_wire_size(k, v);
}
for (name, tensor) in &file.tensors {
header_size += tensor_info_wire_size(name, tensor.ndim());
}
let data_section_start = align_offset(header_size);
let mut offset: u64 = 0;
let mut tensor_offsets = Vec::with_capacity(file.tensors.len());
for (name, tensor) in &file.tensors {
write_gguf_string(&mut w, name)?;
let ndim = tensor.ndim();
write_u32(&mut w, ndim as u32)?;
for &dim in tensor.shape() {
write_u64(&mut w, dim as u64)?;
}
write_u32(&mut w, ggml_type)?;
write_u64(&mut w, offset)?;
tensor_offsets.push(offset);
let numel: usize = tensor.shape().iter().product();
let byte_len = numel * elem_size;
let next = align_offset(offset as usize + byte_len);
offset = next as u64;
}
let padding = data_section_start - header_size;
if padding > 0 {
let pad = vec![0u8; padding];
w.write_all(&pad).map_err(|_| io_err())?;
}
for (i, (_name, tensor)) in file.tensors.iter().enumerate() {
let values = tensor.as_slice();
let expected_offset = tensor_offsets[i] as usize;
let current_data_offset = if i == 0 {
0
} else {
let prev_tensor = &file.tensors[i - 1].1;
let prev_numel: usize = prev_tensor.shape().iter().product();
tensor_offsets[i - 1] as usize + prev_numel * elem_size
};
if expected_offset > current_data_offset {
let pad = vec![0u8; expected_offset - current_data_offset];
w.write_all(&pad).map_err(|_| io_err())?;
}
match elem_size {
4 => {
for &v in values {
#[allow(clippy::cast_possible_truncation)]
let f = v.to_f64() as f32;
w.write_all(&f.to_le_bytes()).map_err(|_| io_err())?;
}
}
8 => {
for &v in values {
let f = v.to_f64();
w.write_all(&f.to_le_bytes()).map_err(|_| io_err())?;
}
}
_ => return Err(ser_err("unsupported element size")),
}
}
w.flush().map_err(|_| io_err())?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn temp_path(name: &str) -> String {
let dir = std::env::temp_dir();
format!(
"{}/scivex_gguf_test_{name}_{}.gguf",
dir.display(),
std::process::id()
)
}
#[test]
fn test_gguf_single_tensor() {
let data = vec![1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0];
let t = Tensor::from_vec(data.clone(), vec![2, 3]).unwrap();
let file = GgufFile {
metadata: vec![],
tensors: vec![("weight".to_string(), t)],
};
let path = temp_path("single");
save_gguf(&path, &file).unwrap();
let loaded: GgufFile<f64> = load_gguf(&path).unwrap();
assert_eq!(loaded.tensors.len(), 1);
assert_eq!(loaded.tensors[0].0, "weight");
assert_eq!(loaded.tensors[0].1.shape(), &[2, 3]);
let loaded_data = loaded.tensors[0].1.as_slice();
for (a, b) in data.iter().zip(loaded_data.iter()) {
assert!((*a - *b).abs() < 1e-12);
}
std::fs::remove_file(&path).ok();
}
#[test]
fn test_gguf_with_metadata() {
let t = Tensor::from_vec(vec![1.0_f64, 2.0], vec![2]).unwrap();
let file = GgufFile {
metadata: vec![
(
"model.name".to_string(),
GgufValue::String("test-model".to_string()),
),
("model.layers".to_string(), GgufValue::Uint32(12)),
("model.version".to_string(), GgufValue::Float32(1.5)),
],
tensors: vec![("bias".to_string(), t)],
};
let path = temp_path("meta");
save_gguf(&path, &file).unwrap();
let loaded: GgufFile<f64> = load_gguf(&path).unwrap();
assert_eq!(loaded.metadata.len(), 3);
assert_eq!(loaded.tensors.len(), 1);
std::fs::remove_file(&path).ok();
}
#[test]
fn test_gguf_multiple_tensors() {
let t1 = Tensor::from_vec(vec![1.0_f64, 2.0, 3.0], vec![3]).unwrap();
let t2 = Tensor::from_vec(vec![4.0_f64, 5.0, 6.0, 7.0], vec![2, 2]).unwrap();
let t3 = Tensor::from_vec(vec![8.0_f64, 9.0], vec![1, 2]).unwrap();
let file = GgufFile {
metadata: vec![],
tensors: vec![
("layer1.weight".to_string(), t1),
("layer1.bias".to_string(), t2),
("layer2.weight".to_string(), t3),
],
};
let path = temp_path("multi");
save_gguf(&path, &file).unwrap();
let loaded: GgufFile<f64> = load_gguf(&path).unwrap();
assert_eq!(loaded.tensors.len(), 3);
assert_eq!(loaded.tensors[0].1.as_slice(), &[1.0, 2.0, 3.0]);
assert_eq!(loaded.tensors[1].1.as_slice(), &[4.0, 5.0, 6.0, 7.0]);
assert_eq!(loaded.tensors[2].1.as_slice(), &[8.0, 9.0]);
std::fs::remove_file(&path).ok();
}
#[test]
#[allow(clippy::approx_constant)]
fn test_gguf_metadata_roundtrip() {
let t = Tensor::from_vec(vec![0.0_f64], vec![1]).unwrap();
let file = GgufFile {
metadata: vec![
("key.uint32".to_string(), GgufValue::Uint32(42)),
("key.int32".to_string(), GgufValue::Int32(-7)),
("key.float32".to_string(), GgufValue::Float32(3.14)),
(
"key.string".to_string(),
GgufValue::String("hello world".to_string()),
),
("key.uint64".to_string(), GgufValue::Uint64(1_000_000)),
],
tensors: vec![("dummy".to_string(), t)],
};
let path = temp_path("kv_roundtrip");
save_gguf(&path, &file).unwrap();
let loaded: GgufFile<f64> = load_gguf(&path).unwrap();
assert_eq!(loaded.metadata.len(), 5);
assert_eq!(
loaded.metadata[0],
("key.uint32".to_string(), GgufValue::Uint32(42))
);
assert_eq!(
loaded.metadata[1],
("key.int32".to_string(), GgufValue::Int32(-7))
);
if let GgufValue::Float32(v) = loaded.metadata[2].1 {
assert!((v - 3.14).abs() < 1e-5);
} else {
panic!("expected Float32");
}
assert_eq!(
loaded.metadata[3],
(
"key.string".to_string(),
GgufValue::String("hello world".to_string())
)
);
assert_eq!(
loaded.metadata[4],
("key.uint64".to_string(), GgufValue::Uint64(1_000_000))
);
std::fs::remove_file(&path).ok();
}
#[test]
fn test_gguf_large_tensor() {
let mut data = Vec::with_capacity(10_000);
for i in 0..10_000 {
data.push(f64::from(i) * 0.001);
}
let t = Tensor::from_vec(data.clone(), vec![100, 100]).unwrap();
let file = GgufFile {
metadata: vec![],
tensors: vec![("big".to_string(), t)],
};
let path = temp_path("large");
save_gguf(&path, &file).unwrap();
let loaded: GgufFile<f64> = load_gguf(&path).unwrap();
assert_eq!(loaded.tensors.len(), 1);
assert_eq!(loaded.tensors[0].1.shape(), &[100, 100]);
let loaded_data = loaded.tensors[0].1.as_slice();
for (a, b) in data.iter().zip(loaded_data.iter()) {
assert!((*a - *b).abs() < 1e-12);
}
std::fs::remove_file(&path).ok();
}
#[test]
#[allow(clippy::float_cmp)]
fn test_f16_to_f32_conversion() {
let val = f16_to_f32(0x3C00);
assert!((val - 1.0).abs() < 1e-6);
let val = f16_to_f32(0x0000);
assert!(val == 0.0);
let val = f16_to_f32(0xBC00);
assert!((val - (-1.0)).abs() < 1e-6);
}
}