Skip to main content

pmetal_gguf/
lib.rs

1//! GGUF file format implementation.
2//!
3//! GGUF (GGML Universal Format) is a file format for storing models
4//! for inference with GGML-based executors like llama.cpp and Ollama.
5//!
6//! This crate provides:
7//! - Types representing the GGUF format
8//! - A reader for loading GGUF files
9//! - A writer for creating GGUF files
10//! - Dequantization routines for quantized tensors
11//!
12//! # Example
13//!
14//! ```ignore
15//! use pmetal_gguf::{GgufContent, dequant};
16//!
17//! // Read GGUF file
18//! let content = GgufContent::from_file("model.gguf")?;
19//!
20//! // Get architecture
21//! if let Some(arch) = content.architecture() {
22//!     println!("Model architecture: {}", arch);
23//! }
24//!
25//! // Read and dequantize a tensor
26//! let mut file = std::fs::File::open("model.gguf")?;
27//! let info = content.get_tensor_info("token_embd.weight").unwrap();
28//! let data = content.read_tensor_data(&mut file, "token_embd.weight")?;
29//! let shape: Vec<i32> = info.dimensions.iter().map(|&d| d as i32).collect();
30//! let floats = dequant::dequantize(&data, info.dtype, &shape)?;
31//! ```
32
33#![warn(missing_docs)]
34
35pub mod config;
36pub mod dequant;
37pub mod dynamic;
38pub mod imatrix;
39pub mod iq_quants;
40pub mod k_quants;
41pub mod quantize;
42pub mod reader;
43mod types;
44pub mod vec_dot;
45mod writer;
46
47pub use reader::{
48    GgufContent, GgufReadError, GgufVersion, MAX_ARRAY_LENGTH, MAX_METADATA_COUNT,
49    MAX_STRING_LENGTH, MAX_TENSOR_COUNT,
50};
51pub use types::*;
52pub use writer::*;