use thiserror::Error;
const GGUF_MAGIC_BYTES: &[u8; 4] = b"GGUF";
const GGUF_MIN_HEADER_BYTES: usize = 8;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum GgufVersion {
V1 = 1,
V2 = 2,
V3 = 3,
}
impl GgufVersion {
pub fn from_u32(v: u32) -> Option<Self> {
match v {
1 => Some(Self::V1),
2 => Some(Self::V2),
3 => Some(Self::V3),
_ => None,
}
}
pub fn to_u32(self) -> u32 {
self as u32
}
pub fn supports_f16_kv(&self) -> bool {
*self >= Self::V2
}
pub fn supports_aligned_tensors(&self) -> bool {
*self >= Self::V3
}
}
impl std::fmt::Display for GgufVersion {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "v{}", self.to_u32())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[allow(non_camel_case_types)]
pub enum ExtendedQuantType {
F32,
F16,
Q4_0,
Q4_1,
Q5_0,
Q5_1,
Q8_0,
Q8_1,
Q2_K,
Q3_K,
Q4_K,
Q5_K,
Q6_K,
Q8_K,
Q1_0_G128,
Unknown(u32),
}
impl ExtendedQuantType {
pub fn from_u32(v: u32) -> Self {
match v {
0 => Self::F32,
1 => Self::F16,
2 => Self::Q4_0,
3 => Self::Q4_1,
6 => Self::Q5_0,
7 => Self::Q5_1,
8 => Self::Q8_0,
9 => Self::Q8_1,
10 => Self::Q2_K,
11 => Self::Q3_K,
12 => Self::Q4_K,
13 => Self::Q5_K,
14 => Self::Q6_K,
15 => Self::Q8_K,
41 => Self::Q1_0_G128,
other => Self::Unknown(other),
}
}
pub fn to_u32(self) -> u32 {
match self {
Self::F32 => 0,
Self::F16 => 1,
Self::Q4_0 => 2,
Self::Q4_1 => 3,
Self::Q5_0 => 6,
Self::Q5_1 => 7,
Self::Q8_0 => 8,
Self::Q8_1 => 9,
Self::Q2_K => 10,
Self::Q3_K => 11,
Self::Q4_K => 12,
Self::Q5_K => 13,
Self::Q6_K => 14,
Self::Q8_K => 15,
Self::Q1_0_G128 => 41,
Self::Unknown(id) => id,
}
}
pub fn bits_per_weight(self) -> f32 {
match self {
Self::F32 => 32.0,
Self::F16 => 16.0,
Self::Q4_0 => 4.5,
Self::Q4_1 => 5.0,
Self::Q5_0 => 5.5,
Self::Q5_1 => 6.0,
Self::Q8_0 => 8.5,
Self::Q8_1 => 8.5,
Self::Q2_K => 2.625,
Self::Q3_K => 3.4375,
Self::Q4_K => 4.5,
Self::Q5_K => 5.5,
Self::Q6_K => 6.5625,
Self::Q8_K => 9.125,
Self::Q1_0_G128 => 1.125,
Self::Unknown(_) => 0.0,
}
}
pub fn is_known(self) -> bool {
!matches!(self, Self::Unknown(_))
}
pub fn name(self) -> &'static str {
match self {
Self::F32 => "F32",
Self::F16 => "F16",
Self::Q4_0 => "Q4_0",
Self::Q4_1 => "Q4_1",
Self::Q5_0 => "Q5_0",
Self::Q5_1 => "Q5_1",
Self::Q8_0 => "Q8_0",
Self::Q8_1 => "Q8_1",
Self::Q2_K => "Q2_K",
Self::Q3_K => "Q3_K",
Self::Q4_K => "Q4_K",
Self::Q5_K => "Q5_K",
Self::Q6_K => "Q6_K",
Self::Q8_K => "Q8_K",
Self::Q1_0_G128 => "Q1_0_G128",
Self::Unknown(_) => "Unknown",
}
}
}
impl std::fmt::Display for ExtendedQuantType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Unknown(id) => write!(f, "Unknown({})", id),
other => write!(f, "{}", other.name()),
}
}
}
#[derive(Debug, Clone)]
pub struct GgufCompatReport {
pub version: GgufVersion,
pub tensor_count: u64,
pub metadata_count: u64,
pub unknown_quant_types: Vec<u32>,
pub warnings: Vec<String>,
pub is_loadable: bool,
}
impl GgufCompatReport {
pub fn new(version: GgufVersion, tensor_count: u64, metadata_count: u64) -> Self {
Self {
version,
tensor_count,
metadata_count,
unknown_quant_types: Vec::new(),
warnings: Vec::new(),
is_loadable: true,
}
}
pub fn add_warning(&mut self, msg: impl Into<String>) {
self.warnings.push(msg.into());
}
pub fn add_unknown_quant(&mut self, quant_id: u32) {
self.unknown_quant_types.push(quant_id);
}
pub fn finalize(&mut self) {
if !self.unknown_quant_types.is_empty() {
self.is_loadable = false;
self.add_warning(format!(
"file contains {} tensor(s) with unrecognised quantization type(s): {:?}",
self.unknown_quant_types.len(),
self.unknown_quant_types,
));
}
}
pub fn summary(&self) -> String {
format!(
"GGUF {} | tensors={} metadata={} | unknown_quants={} | warnings={} | loadable={}",
self.version,
self.tensor_count,
self.metadata_count,
self.unknown_quant_types.len(),
self.warnings.len(),
self.is_loadable,
)
}
}
pub fn check_gguf_header(bytes: &[u8]) -> Result<GgufVersion, CompatError> {
if bytes.len() < GGUF_MIN_HEADER_BYTES {
return Err(CompatError::TruncatedHeader {
need: GGUF_MIN_HEADER_BYTES,
got: bytes.len(),
});
}
let magic = &bytes[0..4];
if magic != GGUF_MAGIC_BYTES {
return Err(CompatError::InvalidMagic(magic.to_vec()));
}
let version_bytes: [u8; 4] =
bytes[4..8]
.try_into()
.map_err(|_| CompatError::TruncatedHeader {
need: GGUF_MIN_HEADER_BYTES,
got: bytes.len(),
})?;
let version_u32 = u32::from_le_bytes(version_bytes);
GgufVersion::from_u32(version_u32).ok_or(CompatError::UnsupportedVersion(version_u32))
}
pub fn build_compat_report(
version_u32: u32,
tensor_count: u64,
metadata_count: u64,
tensor_quant_type_ids: &[u32],
) -> GgufCompatReport {
let (version, unknown_ver) = match GgufVersion::from_u32(version_u32) {
Some(v) => (v, false),
None => (GgufVersion::V3, true),
};
let mut report = GgufCompatReport::new(version, tensor_count, metadata_count);
if unknown_ver {
report.add_warning(format!(
"GGUF version {} is not explicitly supported; treating as v3 for structural parsing",
version_u32
));
}
for &quant_id in tensor_quant_type_ids {
let qt = ExtendedQuantType::from_u32(quant_id);
if !qt.is_known() {
report.add_unknown_quant(quant_id);
}
}
report.finalize();
report
}
#[derive(Debug, Error)]
pub enum CompatError {
#[error("invalid GGUF magic: expected GGUF, got {0:?}")]
InvalidMagic(Vec<u8>),
#[error("unsupported GGUF version: {0}")]
UnsupportedVersion(u32),
#[error("truncated header: need at least {need} bytes, got {got}")]
TruncatedHeader { need: usize, got: usize },
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn gguf_version_round_trips_to_u32() {
assert_eq!(GgufVersion::V1.to_u32(), 1);
assert_eq!(GgufVersion::V2.to_u32(), 2);
assert_eq!(GgufVersion::V3.to_u32(), 3);
}
#[test]
fn gguf_version_display_format() {
assert_eq!(GgufVersion::V1.to_string(), "v1");
assert_eq!(GgufVersion::V2.to_string(), "v2");
assert_eq!(GgufVersion::V3.to_string(), "v3");
}
#[test]
fn extended_quant_unknown_display_includes_id() {
let qt = ExtendedQuantType::Unknown(999);
assert!(qt.to_string().contains("999"));
}
#[test]
fn extended_quant_roundtrip_to_u32() {
assert_eq!(ExtendedQuantType::F32.to_u32(), 0);
assert_eq!(ExtendedQuantType::F16.to_u32(), 1);
assert_eq!(ExtendedQuantType::Q1_0_G128.to_u32(), 41);
assert_eq!(ExtendedQuantType::Unknown(99).to_u32(), 99);
}
}