use std::fs::File;
use std::io::Cursor;
use std::path::Path;
use candle_core::quantized::gguf_file::{Content, TensorInfo, Value};
use candle_core::quantized::QTensor;
use candle_core::{Device, Error as CandleError, Result as CandleResult};
use memmap2::Mmap;
pub struct GgufFile {
mmap: Mmap,
content: Content,
}
impl GgufFile {
pub fn open(path: impl AsRef<Path>) -> CandleResult<Self> {
let path_ref = path.as_ref();
let file = File::open(path_ref).map_err(|e| {
CandleError::Msg(format!(
"failed to open GGUF file '{}': {e}",
path_ref.display()
))
})?;
let mmap = unsafe { Mmap::map(&file) }.map_err(|e| {
CandleError::Msg(format!(
"failed to mmap GGUF file '{}': {e}",
path_ref.display()
))
})?;
let mut cursor = Cursor::new(&mmap[..]);
let content = Content::read(&mut cursor)?;
Ok(Self { mmap, content })
}
pub fn content(&self) -> &Content {
&self.content
}
pub fn architecture(&self) -> CandleResult<&str> {
self.metadata_string("general.architecture")
}
pub fn metadata(&self, key: &str) -> Option<&Value> {
self.content.metadata.get(key)
}
pub fn metadata_string(&self, key: &str) -> CandleResult<&str> {
self.require_metadata(key)?.to_string().map(|s| s.as_str())
}
pub fn metadata_u32(&self, key: &str) -> CandleResult<u32> {
self.require_metadata(key)?.to_u32()
}
pub fn metadata_u64(&self, key: &str) -> CandleResult<u64> {
self.require_metadata(key)?.to_u64()
}
pub fn metadata_f32(&self, key: &str) -> CandleResult<f32> {
self.require_metadata(key)?.to_f32()
}
pub fn metadata_bool(&self, key: &str) -> CandleResult<bool> {
self.require_metadata(key)?.to_bool()
}
fn require_metadata(&self, key: &str) -> CandleResult<&Value> {
self.metadata(key)
.ok_or_else(|| CandleError::Msg(format!("GGUF metadata key missing: '{key}'")))
}
pub fn tensor_count(&self) -> usize {
self.content.tensor_infos.len()
}
pub fn tensor_names(&self) -> impl Iterator<Item = &str> {
self.content.tensor_infos.keys().map(|s| s.as_str())
}
pub fn tensor_info(&self, name: &str) -> Option<&TensorInfo> {
self.content.tensor_infos.get(name)
}
pub fn has_tensor(&self, name: &str) -> bool {
self.content.tensor_infos.contains_key(name)
}
pub fn read_tensor(&self, name: &str, device: &Device) -> CandleResult<QTensor> {
let mut cursor = Cursor::new(&self.mmap[..]);
self.content.tensor(&mut cursor, name, device)
}
pub fn mmap_bytes(&self) -> &[u8] {
&self.mmap[..]
}
pub fn tensor_byte_slice(&self, name: &str) -> Option<&[u8]> {
let info = self.content.tensor_infos.get(name)?;
let elem_count = info.shape.elem_count();
let block_size = info.ggml_dtype.block_size();
if !elem_count.is_multiple_of(block_size) {
return None;
}
let size_in_bytes = elem_count / block_size * info.ggml_dtype.type_size();
let abs_start = (self.content.tensor_data_offset + info.offset) as usize;
let abs_end = abs_start.checked_add(size_in_bytes)?;
if abs_end > self.mmap.len() {
return None;
}
Some(&self.mmap[abs_start..abs_end])
}
pub fn tensor_byte_range(&self, name: &str) -> Option<(usize, usize)> {
let info = self.content.tensor_infos.get(name)?;
let elem_count = info.shape.elem_count();
let block_size = info.ggml_dtype.block_size();
if !elem_count.is_multiple_of(block_size) {
return None;
}
let size_in_bytes = elem_count / block_size * info.ggml_dtype.type_size();
let abs_start = (self.content.tensor_data_offset + info.offset) as usize;
let abs_end = abs_start.checked_add(size_in_bytes)?;
if abs_end > self.mmap.len() {
return None;
}
Some((abs_start, size_in_bytes))
}
}
impl std::fmt::Debug for GgufFile {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("GgufFile")
.field("size_bytes", &self.mmap.len())
.field("metadata_keys", &self.content.metadata.len())
.field("tensor_count", &self.content.tensor_infos.len())
.field("tensor_data_offset", &self.content.tensor_data_offset)
.finish()
}
}