infa-gguf 0.0.1

A minimal rust machine learning library in wip
Documentation
mod quant_block_utils;
mod quant_blocks;
mod tensor;
mod typing;

use std::io::Read;

pub(crate) use quant_block_utils::*;
pub use quant_blocks::*;
pub use tensor::*;
pub use typing::*;

pub struct GGUFParser<R>
where
    R: Read,
{
    bytes: R,
    offset: usize,
}

impl<R> GGUFParser<R>
where
    R: Read,
{
    pub fn new(bytes: R) -> Self {
        Self { bytes, offset: 0 }
    }
    fn read_bytes(&mut self, len: usize) -> crate::Result<Vec<u8>> {
        let mut buf = vec![0; len];
        self.bytes.read_exact(&mut buf)?;
        self.offset += len;
        Ok(buf)
    }
    fn read_string(&mut self) -> crate::Result<String> {
        let len = self.read_bytes(8)?;
        let len = u64::from_le_bytes(len.try_into().unwrap()) as usize;
        let str_bytes = self.read_bytes(len)?;
        Ok(String::from_utf8(str_bytes)?)
    }
    fn read_metadata_value(
        &mut self,
        ty: &GGUFMetadataValueType,
    ) -> crate::Result<GGUFMetadataValue> {
        Ok(match ty {
            GGUFMetadataValueType::Bool => GGUFMetadataValue::Bool(self.read_bytes(1)?[0] != 0),
            GGUFMetadataValueType::Uint8 => GGUFMetadataValue::Uint8(self.read_bytes(1)?[0]),
            GGUFMetadataValueType::Int8 => GGUFMetadataValue::Int8(i8::from_le_bytes(
                self.read_bytes(1)?[0..1].try_into().unwrap(),
            )),
            GGUFMetadataValueType::Uint16 => GGUFMetadataValue::Uint16(u16::from_le_bytes(
                self.read_bytes(2)?[0..2].try_into().unwrap(),
            )),
            GGUFMetadataValueType::Int16 => GGUFMetadataValue::Int16(i16::from_le_bytes(
                self.read_bytes(2)?[0..2].try_into().unwrap(),
            )),
            GGUFMetadataValueType::Uint32 => GGUFMetadataValue::Uint32(u32::from_le_bytes(
                self.read_bytes(4)?[0..4].try_into().unwrap(),
            )),
            GGUFMetadataValueType::Int32 => GGUFMetadataValue::Int32(i32::from_le_bytes(
                self.read_bytes(4)?[0..4].try_into().unwrap(),
            )),
            GGUFMetadataValueType::String => GGUFMetadataValue::String(self.read_string()?),
            GGUFMetadataValueType::Array => {
                let ty = self.read_bytes(4)?;
                let ty =
                    GGUFMetadataValueType::try_from(u32::from_le_bytes(ty.try_into().unwrap()))?;
                let len = self.read_bytes(8)?;
                let len = u64::from_le_bytes(len.try_into().unwrap());
                let mut values = Vec::with_capacity(len as usize);
                for _ in 0..len {
                    values.push(self.read_metadata_value(&ty)?);
                }
                GGUFMetadataValue::Array(values)
            }
            GGUFMetadataValueType::Uint64 => GGUFMetadataValue::Uint64(u64::from_le_bytes(
                self.read_bytes(8)?[0..8].try_into().unwrap(),
            )),
            GGUFMetadataValueType::Int64 => GGUFMetadataValue::Int64(i64::from_le_bytes(
                self.read_bytes(8)?[0..8].try_into().unwrap(),
            )),
            GGUFMetadataValueType::Float64 => {
                let bytes = self.read_bytes(8)?;
                let value = f64::from_le_bytes(bytes.try_into().unwrap());
                GGUFMetadataValue::Float64(value)
            }
            GGUFMetadataValueType::Float32 => {
                let bytes = self.read_bytes(4)?;
                let value = f32::from_le_bytes(bytes.try_into().unwrap());
                GGUFMetadataValue::Float32(value)
            }
        })
    }
    fn read_metadata_kv(&mut self) -> crate::Result<GGUFMetadataKv> {
        let key = self.read_string()?;
        let value_type = self.read_bytes(4)?;
        let value_type = u32::from_le_bytes(value_type.try_into().unwrap());
        let value_type = GGUFMetadataValueType::try_from(value_type)?;
        let value = self.read_metadata_value(&value_type)?;
        Ok(GGUFMetadataKv {
            key,
            value_type,
            value,
        })
    }
    fn read_header(&mut self) -> crate::Result<GGUFHeader> {
        let magic = self.read_bytes(4)?;
        if magic != b"GGUF" {
            return Err(crate::Error::MagicMismatch);
        }
        let version = self.read_bytes(4)?;
        let version = u32::from_le_bytes(version.try_into().unwrap());
        let tensor_count = self.read_bytes(8)?;
        let tensor_count = u64::from_le_bytes(tensor_count.try_into().unwrap());
        let metadata_kv_count = self.read_bytes(8)?;
        let metadata_kv_count = u64::from_le_bytes(metadata_kv_count.try_into().unwrap());
        let mut metadata_kv = Vec::with_capacity(metadata_kv_count as usize);
        for _ in 0..metadata_kv_count {
            metadata_kv.push(self.read_metadata_kv()?);
        }
        Ok(GGUFHeader {
            version,
            tensor_count,
            metadata_kv_count,
            metadata_kv,
        })
    }
    fn read_tensor(&mut self, alginment: u64) -> crate::Result<GGUFTensorMeta> {
        let name = self.read_string()?;
        let n_dimensions = self.read_bytes(4)?;
        let n_dimensions = u32::from_le_bytes(n_dimensions.try_into().unwrap());
        let mut shape = Vec::with_capacity(n_dimensions as usize);
        for _ in 0..n_dimensions {
            let dim = self.read_bytes(8)?;
            shape.push(u64::from_le_bytes(dim.try_into().unwrap()));
        }
        let data_type = self.read_bytes(4)?;
        let data_type = u32::from_le_bytes(data_type.try_into().unwrap());
        let offset = self.read_bytes(8)?;
        let offset = u64::from_le_bytes(offset.try_into().unwrap());
        let offset = offset + (alginment - (offset % alginment)) % alginment;
        Ok(GGUFTensorMeta {
            name,
            shape,
            data_type: GGMLType::try_from(data_type)?,
            offset,
        })
    }
    pub fn parse(mut self) -> crate::Result<GGUF<R>> {
        let header = self.read_header()?;
        let alignment = header
            .metadata_kv
            .iter()
            .find(|x| x.key == "general.alignment")
            .map(|x| x.value.clone())
            .unwrap_or(GGUFMetadataValue::Uint32(32));
        let alignment = match alignment {
            GGUFMetadataValue::Uint32(v) => v,
            _ => return Err(crate::Error::InvalidAlignmentMetaType(alignment)),
        } as u64;
        let mut tensors = Vec::with_capacity(header.tensor_count as usize);
        for _ in 0..header.tensor_count {
            tensors.push(self.read_tensor(alignment)?);
        }
        let alignment = alignment as usize;
        self.offset += (alignment - (self.offset % alignment)) % alignment;
        Ok(GGUF {
            header,
            tensors,
            tensor_bytes: self.bytes,
            offset: self.offset as u64,
        })
    }
}