impl GgufReader {
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
let mut file = File::open(path.as_ref()).map_err(AprenderError::Io)?;
let mut data = Vec::new();
file.read_to_end(&mut data).map_err(AprenderError::Io)?;
Self::from_bytes(data)
}
pub fn from_bytes(data: Vec<u8>) -> Result<Self> {
if data.len() < 24 {
return Err(AprenderError::FormatError {
message: "GGUF file too small (< 24 bytes)".to_string(),
});
}
let magic = read_u32(&data, 0)?;
if magic != GGUF_MAGIC {
let magic_bytes = &data[0..4.min(data.len())];
let magic_ascii: String = magic_bytes
.iter()
.map(|&b| if b.is_ascii_graphic() { b as char } else { '.' })
.collect();
return Err(AprenderError::FormatError {
message: format!(
"Invalid GGUF magic: 0x{magic:08X} (bytes: {magic_bytes:02X?}, ascii: \"{magic_ascii}\"), \
expected 0x{GGUF_MAGIC:08X} (\"GGUF\")"
),
});
}
let version = read_u32(&data, 4)?;
let tensor_count = read_u64(&data, 8)?;
let metadata_kv_count = read_u64(&data, 16)?;
if tensor_count > MAX_TENSOR_COUNT {
return Err(AprenderError::FormatError {
message: format!(
"GGUF tensor_count {} exceeds maximum allowed {} (possible corrupted/malicious file)",
tensor_count, MAX_TENSOR_COUNT
),
});
}
if metadata_kv_count > MAX_METADATA_COUNT {
return Err(AprenderError::FormatError {
message: format!(
"GGUF metadata_kv_count {} exceeds maximum allowed {} (possible corrupted/malicious file)",
metadata_kv_count, MAX_METADATA_COUNT
),
});
}
let mut offset = 24;
let mut metadata = BTreeMap::new();
for _ in 0..metadata_kv_count {
let (key, key_len) = read_string(&data, offset)?;
offset += key_len;
let value_type = read_u32(&data, offset)?;
offset += 4;
if key.starts_with("tokenizer.")
|| key.starts_with("general.")
|| key.starts_with("llama.")
|| key.starts_with("qwen2.")
|| key.starts_with("qwen3.")
|| key.starts_with("phi.")
|| key.starts_with("mistral.")
|| key.starts_with("gpt2.")
{
let (value, value_len) = read_metadata_value(&data, offset, value_type)?;
metadata.insert(key, value);
offset += value_len;
} else {
let value_len = skip_metadata_value(&data, offset, value_type)?;
offset += value_len;
}
}
let mut tensors = Vec::with_capacity(tensor_count as usize);
for _ in 0..tensor_count {
let (name, name_len) = read_string(&data, offset)?;
offset += name_len;
let n_dims = read_u32(&data, offset)?;
offset += 4;
if n_dims > MAX_DIMS {
return Err(AprenderError::FormatError {
message: format!(
"Tensor '{}' has {} dimensions, exceeds maximum {} (possible corrupted file)",
name, n_dims, MAX_DIMS
),
});
}
let mut dims = Vec::with_capacity(n_dims as usize);
for _ in 0..n_dims {
dims.push(read_u64(&data, offset)?);
offset += 8;
}
let dtype = read_u32(&data, offset)?;
offset += 4;
let tensor_offset = read_u64(&data, offset)?;
offset += 8;
tensors.push(GgufTensorMeta {
name,
dims,
dtype,
offset: tensor_offset,
});
}
let padding = padding_for_alignment(offset, GGUF_DEFAULT_ALIGNMENT);
let data_offset = offset + padding;
Ok(Self {
data,
version,
tensor_count,
tensors,
data_offset,
metadata,
})
}
#[must_use]
pub fn vocabulary(&self) -> Option<Vec<String>> {
if let Some(GgufValue::ArrayString(tokens)) = self.metadata.get("tokenizer.ggml.tokens") {
if tokens.is_empty() {
None
} else {
Some(tokens.clone())
}
} else {
None
}
}
#[must_use]
pub fn tokenizer_model(&self) -> Option<String> {
if let Some(GgufValue::String(model)) = self.metadata.get("tokenizer.ggml.model") {
Some(model.clone())
} else {
None
}
}
#[must_use]
pub fn bos_token_id(&self) -> Option<u32> {
if let Some(GgufValue::Uint32(id)) = self.metadata.get("tokenizer.ggml.bos_token_id") {
Some(*id)
} else {
None
}
}
#[must_use]
pub fn eos_token_id(&self) -> Option<u32> {
if let Some(GgufValue::Uint32(id)) = self.metadata.get("tokenizer.ggml.eos_token_id") {
Some(*id)
} else {
None
}
}
#[must_use]
pub fn merges(&self) -> Option<Vec<String>> {
if let Some(GgufValue::ArrayString(merges)) = self.metadata.get("tokenizer.ggml.merges") {
if merges.is_empty() {
None
} else {
Some(merges.clone())
}
} else {
None
}
}
#[must_use]
pub fn architecture(&self) -> Option<String> {
if let Some(GgufValue::String(arch)) = self.metadata.get("general.architecture") {
Some(arch.clone())
} else {
None
}
}
#[must_use]
pub fn model_name(&self) -> Option<String> {
if let Some(GgufValue::String(name)) = self.metadata.get("general.name") {
Some(name.clone())
} else {
None
}
}
#[must_use]
pub fn hidden_size(&self) -> Option<usize> {
let arch = self.architecture().unwrap_or_else(|| "llama".to_string());
let key = format!("{arch}.embedding_length");
match self.metadata.get(&key) {
Some(GgufValue::Uint32(v)) => Some(*v as usize),
Some(GgufValue::Uint64(v)) => Some(*v as usize),
_ => None,
}
}
#[must_use]
pub fn num_layers(&self) -> Option<usize> {
let arch = self.architecture().unwrap_or_else(|| "llama".to_string());
let key = format!("{arch}.block_count");
match self.metadata.get(&key) {
Some(GgufValue::Uint32(v)) => Some(*v as usize),
Some(GgufValue::Uint64(v)) => Some(*v as usize),
_ => None,
}
}
#[must_use]
pub fn num_heads(&self) -> Option<usize> {
let arch = self.architecture().unwrap_or_else(|| "llama".to_string());
let key = format!("{arch}.attention.head_count");
match self.metadata.get(&key) {
Some(GgufValue::Uint32(v)) => Some(*v as usize),
Some(GgufValue::Uint64(v)) => Some(*v as usize),
_ => None,
}
}
#[must_use]
pub fn num_kv_heads(&self) -> Option<usize> {
let arch = self.architecture().unwrap_or_else(|| "llama".to_string());
let key = format!("{arch}.attention.head_count_kv");
match self.metadata.get(&key) {
Some(GgufValue::Uint32(v)) => Some(*v as usize),
Some(GgufValue::Uint64(v)) => Some(*v as usize),
_ => self.num_heads(), }
}
#[must_use]
pub fn vocab_size(&self) -> Option<usize> {
let arch = self.architecture().unwrap_or_else(|| "llama".to_string());
let key = format!("{arch}.vocab_size");
if let Some(GgufValue::Uint32(v)) = self.metadata.get(&key) {
return Some(*v as usize);
}
if let Some(GgufValue::Uint64(v)) = self.metadata.get(&key) {
return Some(*v as usize);
}
self.vocabulary().map(|v| v.len())
}
#[must_use]
pub fn intermediate_size(&self) -> Option<usize> {
let arch = self.architecture().unwrap_or_else(|| "llama".to_string());
let key = format!("{arch}.feed_forward_length");
match self.metadata.get(&key) {
Some(GgufValue::Uint32(v)) => Some(*v as usize),
Some(GgufValue::Uint64(v)) => Some(*v as usize),
_ => None,
}
}
#[must_use]
pub fn context_length(&self) -> Option<usize> {
let arch = self.architecture().unwrap_or_else(|| "llama".to_string());
let key = format!("{arch}.context_length");
match self.metadata.get(&key) {
Some(GgufValue::Uint32(v)) => Some(*v as usize),
Some(GgufValue::Uint64(v)) => Some(*v as usize),
_ => None,
}
}
#[must_use]
pub fn rope_theta(&self) -> Option<f32> {
let arch = self.architecture().unwrap_or_else(|| "llama".to_string());
let key = format!("{arch}.rope.freq_base");
match self.metadata.get(&key) {
Some(GgufValue::Float32(v)) => Some(*v),
Some(GgufValue::Uint32(v)) => Some(*v as f32),
_ => None,
}
}
#[must_use]
pub fn rms_norm_eps(&self) -> Option<f32> {
let arch = self.architecture().unwrap_or_else(|| "llama".to_string());
let rms_key = format!("{arch}.attention.layer_norm_rms_epsilon");
let ln_key = format!("{arch}.attention.layer_norm_epsilon");
match self.metadata.get(&rms_key) {
Some(GgufValue::Float32(v)) => Some(*v),
_ => match self.metadata.get(&ln_key) {
Some(GgufValue::Float32(v)) => Some(*v),
_ => None,
},
}
}
#[must_use]
pub fn token_type(&self) -> Option<Vec<i32>> {
if let Some(GgufValue::ArrayInt32(types)) = self.metadata.get("tokenizer.ggml.token_type") {
if types.is_empty() {
None
} else {
Some(types.clone())
}
} else {
None
}
}
#[must_use]
pub fn padding_token_id(&self) -> Option<u32> {
if let Some(GgufValue::Uint32(id)) = self.metadata.get("tokenizer.ggml.padding_token_id") {
Some(*id)
} else {
None
}
}
#[must_use]
pub fn add_bos_token(&self) -> Option<bool> {
if let Some(GgufValue::Bool(v)) = self.metadata.get("tokenizer.ggml.add_bos_token") {
Some(*v)
} else {
None
}
}
#[must_use]
pub fn chat_template(&self) -> Option<String> {
if let Some(GgufValue::String(tmpl)) = self.metadata.get("tokenizer.chat_template") {
Some(tmpl.clone())
} else {
None
}
}
#[must_use]
pub fn pre_tokenizer_type(&self) -> Option<String> {
if let Some(GgufValue::String(pre)) = self.metadata.get("tokenizer.ggml.pre") {
Some(pre.clone())
} else {
None
}
}
}